Dataset

  • Training HC (N=541)
  • KU HC (N=209)
  • CNUH patient (N=292)
    • TRS : 46
    • FE-SSD : 72
  • Input data : the 77 gray matter FreeSurfer features.
    1. ICV : 1 variable
    2. Subcortical volume : 8 variables
    3. Mean cortical thickness : 34 variables
    4. Mean cortical surface area : 34 variables
      Total 77 variables


Strategy of brain age prediction

  1. Sample exclusion criteria

    • Exclude samples below 18 years and above 75 years old.
    • Exclude samples included in scanning sites with less than 20 control samples.
  2. Data partitioning

    • Train HC Control : Healthy controls from CNUH+Beijing.
    • Test HC Control : Healthy control from KU.
    • Test Patient : Patient from CNUH.
  3. Model development

    • Perform Combat to remove site effect.
    • After removing site effect, build brain age prediction model based on the 77 gray matter FreeSurfer features using Ridge, SVR, RVR, 10-fold Nested cross-validation (10 time).
  4. Test prediction model in Test control data, CNUH patient using variance explained on cross-validation (VEcv), Mean absoulte error (MAE), Peason correlation coefficient (\(r\)).

  5. Calculate Brain-PAD (predicted Age - chronological Age) and corrected Brain-PAD (Goodness-of-fit test)


Data Process


library(openxlsx)
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
whole_dat <- read.xlsx("2021-1174-BrainAge-sMRI-Final.xlsx",sheet=1,startRow = 1) #1174
Sample exclusion with Age & Site
# Age (filtering subject with age <18 or >75)
whole_dat$Age <- as.numeric(whole_dat$Age)
whole_dat$Age <- floor(whole_dat$Age)
sum(whole_dat$Age < 18 | whole_dat$Age > 75)
## [1] 94
table(whole_dat$Site[which(whole_dat$Age < 18 | whole_dat$Age > 75)])
## 
##  CNUH IPCAS   SWU 
##     1    92     1
whole_dat <- whole_dat[-which(whole_dat$Age < 18 | whole_dat$Age > 75),] #1075

# Site
table(whole_dat$Site)
## 
##    BNU   CNUH    HNU  IACAS  IPCAS   JHNU     KU    SWU XHCUMS 
##     56    582      3      2    100     30    214     70     23
whole_dat <- whole_dat[-which(whole_dat$Site=="HNU" | whole_dat$Site=="IACAS"),]

table(whole_dat$Site_2)
## 
##  BEI CNUH   KU 
##  279  582  214
table(whole_dat$Status)
## 
## Control Patient 
##     783     292
# Education
whole_dat$Edu[whole_dat$Edu==1 & !is.na(whole_dat$Edu)] <- 7.5
whole_dat$Edu[whole_dat$Edu==2 & !is.na(whole_dat$Edu)] <- 14
whole_dat$Edu[whole_dat$Edu==3 & !is.na(whole_dat$Edu)] <- 18
whole_dat$Edu <- as.numeric(whole_dat$Edu)
Reduce and seperation Reduce of dataset(KU reslut reference)
table(whole_dat$Status)
## 
## Control Patient 
##     783     292
table(whole_dat$Site_2)
## 
##  BEI CNUH   KU 
##  279  582  214
'%!in%' <- Negate('%in%')
whole_dat <- whole_dat %>% filter(whole_dat$Freesurfer_Number %!in% c("H148","H252","J373"))
whole_dat <- whole_dat %>% filter(whole_dat$Freesurfer_Number %!in% c("K33","K49","K65","K75","K216"))

HC_data <- whole_dat[whole_dat$Status=="Control",]
Patient_data <- setdiff(whole_dat,HC_data)

HC_data <- HC_data[,c(-2)]
Patient_data <- Patient_data[,c(-2)]


Combat


library(sva)
library(factoextra)

whole_dat$Site <- ifelse(whole_dat$Site=="CNUH","JBUH",whole_dat$Site)
whole_dat$Site <- ifelse(whole_dat$Site=="KU","KUAH",whole_dat$Site)

batch <- as.numeric(as.factor(whole_dat$Site))
mod <- model.matrix(~1+Age+Sex+Status,data=whole_dat)
Combat_dat <- ComBat(dat=t(whole_dat[,9:85]), batch=batch, mod=mod)
Combat_dat <- t(Combat_dat)
Combat_dat <- data.frame(SubjID=whole_dat$Freesurfer_Number,Age=whole_dat$Age,
                         Sex=whole_dat$Sex,Site=whole_dat$Site,Combat_dat)

Train_HC_data <- Combat_dat[grep("H|J",whole_dat$Freesurfer_Number),]
Test_HC_data <- Combat_dat[grep("K",whole_dat$Freesurfer_Number),]
Test_Patient_dat <- Combat_dat[grep("C|T",whole_dat$Freesurfer_Number),]

Patient_clinical <- read.xlsx("Patient_clinical_292_0630.xlsx",startRow=1)
Patient_clinical <- Patient_clinical[match(Test_Patient_dat$SubjID,Patient_clinical$freesurfer_ID),]
ID_FESSD <- Patient_clinical$freesurfer_ID[!is.na(Patient_clinical$`FE-SSD`)]
ID_schizo <- Patient_clinical$freesurfer_ID[Patient_clinical$Diagnosis=="schizophrenia"]

Test_TRS_dat <- Test_Patient_dat[grep("T",Test_Patient_dat$SubjID),]
Test_FESSD_dat <- Test_Patient_dat[Test_Patient_dat$SubjID %in% ID_FESSD,]
Test_schizophrenia_dat <- Test_Patient_dat[Test_Patient_dat$SubjID %in% ID_schizo,]

par(mfrow=c(3,2))
plot(density(Train_HC_data$Age,from=15,to=80),main="Age of Train control data")
plot(density(Test_HC_data$Age,from=15,to=80),main="Age of Test control data")
plot(density(Test_Patient_dat$Age,from=15,to=80),main="Age of Test patient data")
plot(density(Test_schizophrenia_dat$Age,from=15,to=80),main="Age of Schizophrenia data")
plot(density(Test_FESSD_dat$Age,from=15,to=80),main="Age of Test FE-SSD data")
plot(density(Test_TRS_dat$Age,from=15,to=80),main="Age of Test TRS data")

PC <- prcomp(scale(whole_dat[,9:85]))
A <- fviz_pca_ind(PC, label="none", habillage=whole_dat$Site,
     addEllipses=TRUE, ellipse.level=0.95, palette = "Dark2",title="Before adjustment") +
  labs(x="Dimension1 (29.0%)", y="Dimension2 (20.7%)")

PC <- prcomp(scale(Combat_dat[,5:81]))
B <- fviz_pca_ind(PC, label="none", habillage=Combat_dat$Site,
     addEllipses=TRUE, ellipse.level=0.95, palette = "Dark2",title="After adjustment") + 
  labs(x="Dimension1 (30.0%)", y="Dimension2 (20.8%)")

library(MASS)
f <- as.formula(paste("Site~",paste(colnames(whole_dat)[9:85],collapse ="+"),sep=""))
lda <- lda(f, data = whole_dat)
lda.value <- predict(lda)
lda.data <- data.frame(Site = whole_dat$Site, lda = lda.value$x)
C <- ggplot(lda.data) + geom_point(aes(lda.LD1, lda.LD2, colour = Site), size = 2.5)

lda <- lda(f, data = Combat_dat)
lda.value <- predict(lda)
lda.data <- data.frame(Site = Combat_dat$Site, lda = lda.value$x)
D <- ggplot(lda.data) + geom_point(aes(lda.LD1, lda.LD2, colour = Site), size = 2.5)
A;B;C;D

library(ggpubr)
png("s1.png", width = 1200, height = 800)
ggarrange(A,B,C,D, 
          labels = c("A","B","C","D"), 
          ncol = 2, nrow = 2,
          font.label = list(size=20))
dev.off()
## png 
##   2
# PCA outlier dection in Train HC data
PC <- prcomp(scale(Train_HC_data[,5:81]))
fviz_pca_ind(PC,col.ind = "#00AFBB",repel=TRUE)
## Warning: ggrepel: 466 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

fviz_pca_ind(PC, addEllipses=TRUE, ellipse.level=0.95, palette = "Dark2", title="") + labs(x="Dimension1 (30.3%)", y="Dimension2 (18.1%)")

out <- c(631,95,171,75,233,646,159,766,223,247,267,5,154,186,192,268,36,238,256,3,249,41,211,197,657) #1067

Train_HC_data$SubjID[rownames(Train_HC_data) %in% out]
##  [1] "H03"  "H05"  "H36"  "H41"  "H75"  "H95"  "H155" "H160" "H172" "H187"
## [11] "H193" "H198" "H212" "H224" "H234" "H239" "H248" "H268" "H250" "H260"
## [21] "H263" "J161" "J248" "J259" "J375"
length(out) # 25
## [1] 25
sum(!rownames(Train_HC_data) %in% out) # 566 --> 541
## [1] 541
Train_HC_data <- Train_HC_data[which(!rownames(Train_HC_data) %in% out),]


Demographics and Clinical characteristics


#Train_HC vs Test_HC vs Schizophrenia
summary_dat_3 <- data.frame(rbind(Train_HC_data,Test_HC_data,Test_schizophrenia_dat))
Edu_dat <- whole_dat[,c(1,6)]
colnames(Edu_dat)[1] <- c("SubjID")
summary_dat_3 <- inner_join(summary_dat_3,Edu_dat,by="SubjID")
summary_dat_3$set <- rep(c(1:3),c(nrow(Train_HC_data),nrow(Test_HC_data),nrow(Test_schizophrenia_dat)))
summary_dat_3$set <- as.factor(summary_dat_3$set)


three.fun_var <- function(var,group,DATA) {
  a11 <- paste("n=",sum(!is.na(DATA[var][DATA[group]==1])),sep="")
  a21 <- paste(round(mean(DATA[var][DATA[group]==1],na.rm = T),2),
               round(sd(DATA[var][DATA[group]==1],na.rm = T),2),sep = " ± ")
  a_1 <- paste(a21,"(",a11,")",sep="")
  
  a12 <- paste("n=",sum(!is.na(DATA[var][DATA[group]==2])),sep="")
  a22 <- paste(round(mean(DATA[var][DATA[group]==2],na.rm = T),2),
               round(sd(DATA[var][DATA[group]==2],na.rm = T),2),sep = " ± ")
  a_2 <-  paste(a22,"(",a12,")",sep="")
  
  a13 <- paste("n=",sum(!is.na(DATA[var][DATA[group]==3])),sep="")
  a23 <- paste(round(mean(DATA[var][DATA[group]==3],na.rm = T),2),
               round(sd(DATA[var][DATA[group]==3],na.rm = T),2),sep = " ± ")
  a_3 <-  paste(a23,"(",a13,")",sep="")
  
  f <- as.formula(paste(var,"~",group))
  Aov <- aov(lm(f,data=DATA))
  b <- round(summary(Aov)[[1]][["Pr(>F)"]][1],4)
  
  y <- DATA[var][,1]
  group <-DATA[group][,1]
  posthoc <- pairwise.t.test(y, group, "bonferroni")
  posthoc <- data.frame(posthoc[3])
  c <- round(posthoc[1,1],4)
  d <- round(posthoc[2,1],4)
  e <- round(posthoc[2,2],4)
  return(matrix(c(a_1,a_2,a_3,b,c,d,e),1,7))
}


TABLE_clinical_3 <- three.fun_var("Age","set",summary_dat_3) 
TABLE_clinical_3 <- rbind(TABLE_clinical_3,three.fun_var("Edu","set",summary_dat_3))
t <- table(summary_dat_3$Sex,summary_dat_3$set)
Sex <- c(paste(t[,1],collapse = "/"),paste(t[,2],collapse = "/"),paste(t[,3],collapse = "/"),round(chisq.test(t)$p.value,4))
Sex <- c(Sex,NA,NA,NA)
TABLE_clinical_3 <- rbind(Sex,TABLE_clinical_3)
rownames(TABLE_clinical_3) <- c("Sex(M/F)","Age","Education, years")
colnames(TABLE_clinical_3) <- c("Health control (CNUH, Beijing)","Health control (KU)","Schizophrenia (CNUH)","p-value","Group 1 vs Group 2", "Group 1 vs Group 3","Group 2 vs Group 3")
TABLE_clinical_3
##                  Health control (CNUH, Beijing) Health control (KU)    
## Sex(M/F)         "281/260"                      "77/132"               
## Age              "31.9 ± 13.7(n=541)"          "37.66 ± 14.66(n=209)"
## Education, years "13.94 ± 2.41(n=267)"         "13.83 ± 2.35(n=209)" 
##                  Schizophrenia (CNUH)   p-value  Group 1 vs Group 2
## Sex(M/F)         "108/88"               "2e-04"  NA                
## Age              "39.34 ± 12.9(n=196)" "0"      "0"               
## Education, years "13.19 ± 3.17(n=195)" "0.0074" "1"               
##                  Group 1 vs Group 3 Group 2 vs Group 3
## Sex(M/F)         NA                 NA                
## Age              "0"                "0.6544"          
## Education, years "0.0084"           "0.0473"
#TestHC vs FE-SSD vs TRS
summary_dat_p3 <- data.frame(rbind(Test_HC_data,Test_FESSD_dat,Test_TRS_dat))
summary_dat_p3 <- inner_join(summary_dat_p3,Edu_dat,by="SubjID")
summary_dat_p3$set <- rep(c(1:3),c(nrow(Test_HC_data),nrow(Test_FESSD_dat),nrow(Test_TRS_dat)))
summary_dat_p3$set <- as.factor(summary_dat_p3$set)


TABLE_clinical_p3 <- three.fun_var("Age","set",summary_dat_p3) 
TABLE_clinical_p3 <- rbind(TABLE_clinical_p3,three.fun_var("Edu","set",summary_dat_p3))
t <- table(summary_dat_p3$Sex,summary_dat_p3$set)
Sex <- c(paste(t[,1],collapse = "/"),paste(t[,2],collapse = "/"),paste(t[,3],collapse = "/"),round(chisq.test(t)$p.value,3))
Sex <- c(Sex,NA,NA,NA)
TABLE_clinical_p3 <- rbind(Sex,TABLE_clinical_p3)
rownames(TABLE_clinical_p3) <- c("Sex(M/F)","Age","Education, years")
colnames(TABLE_clinical_p3) <- c("Test HC","FE-SSD","TRS","p-value","Group 1 vs Group 2", "Group 1 vs Group 3","Group 2 vs Group 3")
TABLE_clinical_p3
##                  Test HC                 FE-SSD                
## Sex(M/F)         "77/132"                "28/44"               
## Age              "37.66 ± 14.66(n=209)" "29.61 ± 12.26(n=72)"
## Education, years "13.83 ± 2.35(n=209)"  "13.26 ± 2.79(n=72)" 
##                  TRS                   p-value  Group 1 vs Group 2
## Sex(M/F)         "30/16"               "0.002"  NA                
## Age              "42.61 ± 9.9(n=46)"  "0"      "1e-04"           
## Education, years "13.73 ± 2.36(n=46)" "0.2405" "0.277"           
##                  Group 1 vs Group 3 Group 2 vs Group 3
## Sex(M/F)         NA                 NA                
## Age              "0.0776"           "0"               
## Education, years "1"                "0.9508"


Comparison of the 77 gray matter Freesurfer features


three.fun_var <- function(var,group,DATA) {
  a11 <- paste("n=",sum(!is.na(DATA[var][DATA[group]==1])),sep="")
  a21 <- paste(round(mean(DATA[var][DATA[group]==1],na.rm = T),2),
               round(sd(DATA[var][DATA[group]==1],na.rm = T),2),sep = " ± ")
  a_1 <- paste(a21,"(",a11,")",sep="")
  
  a12 <- paste("n=",sum(!is.na(DATA[var][DATA[group]==2])),sep="")
  a22 <- paste(round(mean(DATA[var][DATA[group]==2],na.rm = T),2),
               round(sd(DATA[var][DATA[group]==2],na.rm = T),2),sep = " ± ")
  a_2 <-  paste(a22,"(",a12,")",sep="")
  
  a13 <- paste("n=",sum(!is.na(DATA[var][DATA[group]==3])),sep="")
  a23 <- paste(round(mean(DATA[var][DATA[group]==3],na.rm = T),2),
               round(sd(DATA[var][DATA[group]==3],na.rm = T),2),sep = " ± ")
  a_3 <-  paste(a23,"(",a13,")",sep="")
  
  f <- as.formula(paste(var,"~","set"))
  Aov <- aov(lm(f,data=DATA))
  b <- round(summary(Aov)[[1]][["Pr(>F)"]][1],4)

  y <- DATA[var][,1]
  group <-DATA[group][,1]
  posthoc <- pairwise.t.test(y, group, "bonferroni")
  posthoc <- data.frame(posthoc[3])
  c <- round(posthoc[1,1],4)
  d <- round(posthoc[2,1],4)
  e <- round(posthoc[2,2],4)
  
  f1 <- as.formula(paste(var,"~","Age + Sex + Edu + set"))
  Aov1 <- aov(lm(f1,data=DATA))
  f <- round(summary(Aov1)[[1]][["Pr(>F)"]][4],4)
  return(matrix(c(a_1,a_2,a_3,b,c,d,e,f),1,8))
}

conti_var <- setdiff(colnames(summary_dat_3),c("SubjID","Age","Site","Sex","set","Edu"))

TABLE_sMRI_3group <- NULL
for (i in 1:length(conti_var)){
  TABLE_sMRI_3group  <- rbind(TABLE_sMRI_3group,three.fun_var(conti_var[i],"set",summary_dat_3))
}
rownames(TABLE_sMRI_3group) <- conti_var
colnames(TABLE_sMRI_3group) <- c("Health control (CNUH, Beijing)","Health control (KU)","Schizophrenia (CNUH)","p-value","Group 1 vs Group 2", "Group 1 vs Group 3","Group 2 vs Group 3","age.sex.edu.adj-p")
TABLE_sMRI_3group <- data.frame(TABLE_sMRI_3group)
TABLE_sMRI_3group$p.value <- as.numeric(paste(TABLE_sMRI_3group$p.value))
TABLE_sMRI_3group$bonf.adj.p.value <- p.adjust(TABLE_sMRI_3group$p.value,method="bonferroni")
TABLE_sMRI_3group$bonf.age.sex.edu.adj.p.value <- p.adjust(TABLE_sMRI_3group$age.sex.edu.adj.p,method="bonferroni")
TABLE_sMRI_3group
##                                       Health.control..CNUH..Beijing.
## Avg_bankssts_thickavg                            2.62 ± 0.13(n=541)
## Avg_caudalanteriorcingulate_thickavg             2.71 ± 0.18(n=541)
## Avg_caudalmiddlefrontal_thickavg                 2.66 ± 0.11(n=541)
## Avg_cuneus_thickavg                               1.9 ± 0.11(n=541)
## Avg_entorhinal_thickavg                          3.47 ± 0.25(n=541)
## Avg_fusiform_thickavg                             2.8 ± 0.11(n=541)
## Avg_inferiorparietal_thickavg                    2.57 ± 0.11(n=541)
## Avg_inferiortemporal_thickavg                    2.87 ± 0.11(n=541)
## Avg_isthmuscingulate_thickavg                    2.39 ± 0.16(n=541)
## Avg_lateraloccipital_thickavg                     2.31 ± 0.1(n=541)
## Avg_lateralorbitofrontal_thickavg                2.74 ± 0.12(n=541)
## Avg_lingual_thickavg                             2.01 ± 0.11(n=541)
## Avg_medialorbitofrontal_thickavg                 2.58 ± 0.13(n=541)
## Avg_middletemporal_thickavg                      2.98 ± 0.12(n=541)
## Avg_parahippocampal_thickavg                     2.69 ± 0.21(n=541)
## Avg_paracentral_thickavg                         2.51 ± 0.13(n=541)
## Avg_parsopercularis_thickavg                     2.68 ± 0.13(n=541)
## Avg_parsorbitalis_thickavg                       2.81 ± 0.15(n=541)
## Avg_parstriangularis_thickavg                    2.58 ± 0.12(n=541)
## Avg_pericalcarine_thickavg                       1.62 ± 0.11(n=541)
## Avg_postcentral_thickavg                         2.12 ± 0.11(n=541)
## Avg_posteriorcingulate_thickavg                  2.57 ± 0.12(n=541)
## Avg_precentral_thickavg                          2.62 ± 0.12(n=541)
## Avg_precuneus_thickavg                           2.45 ± 0.11(n=541)
## Avg_rostralanteriorcingulate_thickavg            2.95 ± 0.17(n=541)
## Avg_rostralmiddlefrontal_thickavg                2.51 ± 0.11(n=541)
## Avg_superiorfrontal_thickavg                     2.88 ± 0.12(n=541)
## Avg_superiorparietal_thickavg                     2.28 ± 0.1(n=541)
## Avg_superiortemporal_thickavg                    2.89 ± 0.14(n=541)
## Avg_supramarginal_thickavg                       2.63 ± 0.12(n=541)
## Avg_frontalpole_thickavg                         2.98 ± 0.24(n=541)
## Avg_temporalpole_thickavg                        3.75 ± 0.22(n=541)
## Avg_transversetemporal_thickavg                  2.34 ± 0.17(n=541)
## Avg_insula_thickavg                              3.04 ± 0.15(n=541)
## Avg_bankssts_surfavg                          967.27 ± 121.6(n=541)
## Avg_caudalanteriorcingulate_surfavg          644.41 ± 107.83(n=541)
## Avg_caudalmiddlefrontal_surfavg             2161.25 ± 303.97(n=541)
## Avg_cuneus_surfavg                           1625.59 ± 190.3(n=541)
## Avg_entorhinal_surfavg                        419.65 ± 68.17(n=541)
## Avg_fusiform_surfavg                        3129.58 ± 322.75(n=541)
## Avg_inferiorparietal_surfavg                5215.98 ± 633.84(n=541)
## Avg_inferiortemporal_surfavg                3491.61 ± 427.49(n=541)
## Avg_isthmuscingulate_surfavg                1007.55 ± 132.69(n=541)
## Avg_lateraloccipital_surfavg                5117.15 ± 576.14(n=541)
## Avg_lateralorbitofrontal_surfavg            2585.99 ± 250.04(n=541)
## Avg_lingual_surfavg                         3230.64 ± 386.62(n=541)
## Avg_medialorbitofrontal_surfavg             1912.96 ± 187.66(n=541)
## Avg_middletemporal_surfavg                  3474.81 ± 400.54(n=541)
## Avg_parahippocampal_surfavg                    662.72 ± 69.8(n=541)
## Avg_paracentral_surfavg                     1431.88 ± 146.87(n=541)
## Avg_parsopercularis_surfavg                 1504.13 ± 196.39(n=541)
## Avg_parsorbitalis_surfavg                     727.44 ± 80.59(n=541)
## Avg_parstriangularis_surfavg                 1450.1 ± 191.15(n=541)
## Avg_pericalcarine_surfavg                   1565.32 ± 244.62(n=541)
## Avg_postcentral_surfavg                     4193.87 ± 424.74(n=541)
## Avg_posteriorcingulate_surfavg              1200.54 ± 149.72(n=541)
## Avg_precentral_surfavg                      4977.14 ± 459.71(n=541)
## Avg_precuneus_surfavg                       4070.86 ± 458.89(n=541)
## Avg_rostralanteriorcingulate_surfavg          706.8 ± 108.98(n=541)
## Avg_rostralmiddlefrontal_surfavg            5648.49 ± 717.16(n=541)
## Avg_superiorfrontal_surfavg                  6839.4 ± 713.29(n=541)
## Avg_superiorparietal_surfavg                5491.92 ± 592.31(n=541)
## Avg_superiortemporal_surfavg                3876.63 ± 386.45(n=541)
## Avg_supramarginal_surfavg                   3951.13 ± 524.18(n=541)
## Avg_frontalpole_surfavg                       276.89 ± 25.98(n=541)
## Avg_temporalpole_surfavg                      473.63 ± 49.32(n=541)
## Avg_transversetemporal_surfavg                413.02 ± 52.01(n=541)
## Avg_insula_surfavg                          2458.06 ± 231.67(n=541)
## ICV                                   1510615.35 ± 156339.76(n=541)
## Avg_LatVent                                7519.75 ± 4124.85(n=541)
## Avg_thal                                    8094.88 ± 843.53(n=541)
## Avg_caud                                    3551.91 ± 456.96(n=541)
## Avg_put                                      5145.54 ± 589.4(n=541)
## Avg_pal                                     2020.35 ± 230.43(n=541)
## Avg_hippo                                   4275.86 ± 370.71(n=541)
## Avg_amyg                                    1760.78 ± 188.58(n=541)
## Avg_accumb                                     482.6 ± 81.35(n=541)
##                                                  Health.control..KU.
## Avg_bankssts_thickavg                             2.6 ± 0.13(n=209)
## Avg_caudalanteriorcingulate_thickavg             2.69 ± 0.18(n=209)
## Avg_caudalmiddlefrontal_thickavg                 2.64 ± 0.13(n=209)
## Avg_cuneus_thickavg                              1.89 ± 0.11(n=209)
## Avg_entorhinal_thickavg                          3.46 ± 0.26(n=209)
## Avg_fusiform_thickavg                            2.78 ± 0.13(n=209)
## Avg_inferiorparietal_thickavg                    2.55 ± 0.11(n=209)
## Avg_inferiortemporal_thickavg                    2.85 ± 0.13(n=209)
## Avg_isthmuscingulate_thickavg                    2.37 ± 0.17(n=209)
## Avg_lateraloccipital_thickavg                    2.29 ± 0.11(n=209)
## Avg_lateralorbitofrontal_thickavg                2.71 ± 0.15(n=209)
## Avg_lingual_thickavg                             1.99 ± 0.11(n=209)
## Avg_medialorbitofrontal_thickavg                 2.55 ± 0.15(n=209)
## Avg_middletemporal_thickavg                      2.95 ± 0.13(n=209)
## Avg_parahippocampal_thickavg                     2.68 ± 0.22(n=209)
## Avg_paracentral_thickavg                         2.49 ± 0.14(n=209)
## Avg_parsopercularis_thickavg                     2.65 ± 0.13(n=209)
## Avg_parsorbitalis_thickavg                       2.78 ± 0.18(n=209)
## Avg_parstriangularis_thickavg                    2.55 ± 0.14(n=209)
## Avg_pericalcarine_thickavg                        1.6 ± 0.11(n=209)
## Avg_postcentral_thickavg                          2.1 ± 0.11(n=209)
## Avg_posteriorcingulate_thickavg                  2.55 ± 0.13(n=209)
## Avg_precentral_thickavg                           2.6 ± 0.13(n=209)
## Avg_precuneus_thickavg                           2.43 ± 0.12(n=209)
## Avg_rostralanteriorcingulate_thickavg            2.93 ± 0.17(n=209)
## Avg_rostralmiddlefrontal_thickavg                2.49 ± 0.12(n=209)
## Avg_superiorfrontal_thickavg                     2.86 ± 0.14(n=209)
## Avg_superiorparietal_thickavg                    2.27 ± 0.11(n=209)
## Avg_superiortemporal_thickavg                    2.85 ± 0.14(n=209)
## Avg_supramarginal_thickavg                        2.6 ± 0.13(n=209)
## Avg_frontalpole_thickavg                         2.95 ± 0.24(n=209)
## Avg_temporalpole_thickavg                        3.73 ± 0.22(n=209)
## Avg_transversetemporal_thickavg                   2.3 ± 0.19(n=209)
## Avg_insula_thickavg                                 3 ± 0.16(n=209)
## Avg_bankssts_surfavg                            944.62 ± 136(n=209)
## Avg_caudalanteriorcingulate_surfavg          630.33 ± 121.25(n=209)
## Avg_caudalmiddlefrontal_surfavg             2103.13 ± 325.89(n=209)
## Avg_cuneus_surfavg                           1592.88 ± 209.9(n=209)
## Avg_entorhinal_surfavg                        416.99 ± 69.42(n=209)
## Avg_fusiform_surfavg                        3068.67 ± 381.86(n=209)
## Avg_inferiorparietal_surfavg                5088.47 ± 692.61(n=209)
## Avg_inferiortemporal_surfavg                3417.91 ± 478.98(n=209)
## Avg_isthmuscingulate_surfavg                 983.79 ± 146.68(n=209)
## Avg_lateraloccipital_surfavg                5005.72 ± 643.03(n=209)
## Avg_lateralorbitofrontal_surfavg            2541.39 ± 281.62(n=209)
## Avg_lingual_surfavg                         3174.18 ± 405.57(n=209)
## Avg_medialorbitofrontal_surfavg             1886.85 ± 214.77(n=209)
## Avg_middletemporal_surfavg                  3385.71 ± 459.17(n=209)
## Avg_parahippocampal_surfavg                   652.04 ± 78.56(n=209)
## Avg_paracentral_surfavg                     1416.41 ± 165.77(n=209)
## Avg_parsopercularis_surfavg                 1472.68 ± 208.32(n=209)
## Avg_parsorbitalis_surfavg                     712.87 ± 90.11(n=209)
## Avg_parstriangularis_surfavg                1413.56 ± 213.86(n=209)
## Avg_pericalcarine_surfavg                   1542.17 ± 254.94(n=209)
## Avg_postcentral_surfavg                     4126.92 ± 478.16(n=209)
## Avg_posteriorcingulate_surfavg              1172.77 ± 170.94(n=209)
## Avg_precentral_surfavg                      4899.29 ± 520.94(n=209)
## Avg_precuneus_surfavg                       3979.26 ± 524.37(n=209)
## Avg_rostralanteriorcingulate_surfavg          690.54 ± 131.3(n=209)
## Avg_rostralmiddlefrontal_surfavg            5484.98 ± 842.56(n=209)
## Avg_superiorfrontal_surfavg                 6686.19 ± 850.77(n=209)
## Avg_superiorparietal_surfavg                5374.61 ± 669.01(n=209)
## Avg_superiortemporal_surfavg                 3796.1 ± 459.01(n=209)
## Avg_supramarginal_surfavg                     3864.8 ± 593.6(n=209)
## Avg_frontalpole_surfavg                       272.24 ± 29.71(n=209)
## Avg_temporalpole_surfavg                       469.5 ± 51.02(n=209)
## Avg_transversetemporal_surfavg                405.28 ± 56.65(n=209)
## Avg_insula_surfavg                          2426.56 ± 268.11(n=209)
## ICV                                   1478544.66 ± 178969.65(n=209)
## Avg_LatVent                                7857.61 ± 4649.91(n=209)
## Avg_thal                                    7870.15 ± 876.73(n=209)
## Avg_caud                                        3456 ± 460.1(n=209)
## Avg_put                                     4985.31 ± 618.35(n=209)
## Avg_pal                                     1983.49 ± 233.67(n=209)
## Avg_hippo                                   4193.14 ± 401.99(n=209)
## Avg_amyg                                    1719.96 ± 205.86(n=209)
## Avg_accumb                                    466.86 ± 84.93(n=209)
##                                                 Schizophrenia..CNUH. p.value
## Avg_bankssts_thickavg                            2.52 ± 0.12(n=196)  0.0000
## Avg_caudalanteriorcingulate_thickavg             2.65 ± 0.19(n=196)  0.0014
## Avg_caudalmiddlefrontal_thickavg                 2.54 ± 0.12(n=196)  0.0000
## Avg_cuneus_thickavg                              1.86 ± 0.11(n=196)  0.0000
## Avg_entorhinal_thickavg                           3.4 ± 0.25(n=196)  0.0102
## Avg_fusiform_thickavg                            2.71 ± 0.12(n=196)  0.0000
## Avg_inferiorparietal_thickavg                    2.47 ± 0.11(n=196)  0.0000
## Avg_inferiortemporal_thickavg                    2.78 ± 0.12(n=196)  0.0000
## Avg_isthmuscingulate_thickavg                    2.28 ± 0.16(n=196)  0.0000
## Avg_lateraloccipital_thickavg                     2.22 ± 0.1(n=196)  0.0000
## Avg_lateralorbitofrontal_thickavg                2.61 ± 0.13(n=196)  0.0000
## Avg_lingual_thickavg                              1.98 ± 0.1(n=196)  0.0014
## Avg_medialorbitofrontal_thickavg                 2.47 ± 0.14(n=196)  0.0000
## Avg_middletemporal_thickavg                      2.86 ± 0.13(n=196)  0.0000
## Avg_parahippocampal_thickavg                      2.6 ± 0.23(n=196)  0.0000
## Avg_paracentral_thickavg                         2.44 ± 0.12(n=196)  0.0000
## Avg_parsopercularis_thickavg                     2.55 ± 0.14(n=196)  0.0000
## Avg_parsorbitalis_thickavg                       2.64 ± 0.16(n=196)  0.0000
## Avg_parstriangularis_thickavg                    2.45 ± 0.12(n=196)  0.0000
## Avg_pericalcarine_thickavg                        1.59 ± 0.1(n=196)  0.0036
## Avg_postcentral_thickavg                         2.03 ± 0.09(n=196)  0.0000
## Avg_posteriorcingulate_thickavg                  2.49 ± 0.14(n=196)  0.0000
## Avg_precentral_thickavg                          2.52 ± 0.13(n=196)  0.0000
## Avg_precuneus_thickavg                           2.37 ± 0.11(n=196)  0.0000
## Avg_rostralanteriorcingulate_thickavg            2.84 ± 0.18(n=196)  0.0000
## Avg_rostralmiddlefrontal_thickavg                 2.37 ± 0.1(n=196)  0.0000
## Avg_superiorfrontal_thickavg                     2.76 ± 0.12(n=196)  0.0000
## Avg_superiorparietal_thickavg                      2.2 ± 0.1(n=196)  0.0000
## Avg_superiortemporal_thickavg                    2.74 ± 0.14(n=196)  0.0000
## Avg_supramarginal_thickavg                       2.52 ± 0.12(n=196)  0.0000
## Avg_frontalpole_thickavg                         2.79 ± 0.23(n=196)  0.0000
## Avg_temporalpole_thickavg                        3.62 ± 0.24(n=196)  0.0000
## Avg_transversetemporal_thickavg                  2.28 ± 0.17(n=196)  0.0002
## Avg_insula_thickavg                              2.94 ± 0.14(n=196)  0.0000
## Avg_bankssts_surfavg                         948.37 ± 137.03(n=196)  0.0454
## Avg_caudalanteriorcingulate_surfavg          596.52 ± 107.37(n=196)  0.0000
## Avg_caudalmiddlefrontal_surfavg             2101.64 ± 315.48(n=196)  0.0162
## Avg_cuneus_surfavg                           1636.5 ± 211.71(n=196)  0.0605
## Avg_entorhinal_surfavg                        417.42 ± 66.11(n=196)  0.8584
## Avg_fusiform_surfavg                        3001.55 ± 359.04(n=196)  0.0000
## Avg_inferiorparietal_surfavg                5121.09 ± 660.05(n=196)  0.0300
## Avg_inferiortemporal_surfavg                3435.33 ± 456.73(n=196)  0.0775
## Avg_isthmuscingulate_surfavg                1010.33 ± 140.58(n=196)  0.0739
## Avg_lateraloccipital_surfavg                 5049.48 ± 632.7(n=196)  0.0579
## Avg_lateralorbitofrontal_surfavg            2598.42 ± 291.28(n=196)  0.0616
## Avg_lingual_surfavg                         3170.28 ± 397.74(n=196)  0.0782
## Avg_medialorbitofrontal_surfavg             1900.63 ± 214.92(n=196)  0.2630
## Avg_middletemporal_surfavg                  3414.26 ± 440.77(n=196)  0.0204
## Avg_parahippocampal_surfavg                    643.1 ± 75.17(n=196)  0.0036
## Avg_paracentral_surfavg                     1442.89 ± 171.22(n=196)  0.2275
## Avg_parsopercularis_surfavg                 1451.08 ± 222.88(n=196)  0.0046
## Avg_parsorbitalis_surfavg                     732.71 ± 96.61(n=196)  0.0474
## Avg_parstriangularis_surfavg                1424.67 ± 222.33(n=196)  0.0566
## Avg_pericalcarine_surfavg                   1553.11 ± 243.39(n=196)  0.4938
## Avg_postcentral_surfavg                     4160.13 ± 468.26(n=196)  0.1682
## Avg_posteriorcingulate_surfavg              1165.58 ± 169.97(n=196)  0.0105
## Avg_precentral_surfavg                      4964.07 ± 518.41(n=196)  0.1420
## Avg_precuneus_surfavg                       4018.68 ± 495.53(n=196)  0.0521
## Avg_rostralanteriorcingulate_surfavg         677.15 ± 128.99(n=196)  0.0076
## Avg_rostralmiddlefrontal_surfavg             5730.9 ± 862.22(n=196)  0.0045
## Avg_superiorfrontal_surfavg                  6803.96 ± 833.8(n=196)  0.0512
## Avg_superiorparietal_surfavg                 5439.7 ± 629.39(n=196)  0.0615
## Avg_superiortemporal_surfavg                3799.18 ± 432.68(n=196)  0.0147
## Avg_supramarginal_surfavg                   3906.13 ± 564.42(n=196)  0.1387
## Avg_frontalpole_surfavg                        276.3 ± 32.03(n=196)  0.1222
## Avg_temporalpole_surfavg                       478.9 ± 54.22(n=196)  0.1754
## Avg_transversetemporal_surfavg                393.96 ± 55.11(n=196)  0.0001
## Avg_insula_surfavg                          2415.61 ± 261.63(n=196)  0.0690
## ICV                                   1508999.89 ± 155566.37(n=196)  0.0440
## Avg_LatVent                                9878.63 ± 5154.56(n=196)  0.0000
## Avg_thal                                    7226.18 ± 880.94(n=196)  0.0000
## Avg_caud                                    3388.83 ± 424.66(n=196)  0.0000
## Avg_put                                     4968.01 ± 637.48(n=196)  0.0001
## Avg_pal                                        2097 ± 234.66(n=196)  0.0000
## Avg_hippo                                   3950.52 ± 434.63(n=196)  0.0000
## Avg_amyg                                    1664.61 ± 229.07(n=196)  0.0000
## Avg_accumb                                    453.43 ± 84.76(n=196)  0.0001
##                                       Group.1.vs.Group.2 Group.1.vs.Group.3
## Avg_bankssts_thickavg                             0.1717                  0
## Avg_caudalanteriorcingulate_thickavg              0.3901             0.0011
## Avg_caudalmiddlefrontal_thickavg                  0.0705                  0
## Avg_cuneus_thickavg                               0.1587                  0
## Avg_entorhinal_thickavg                                1             0.0078
## Avg_fusiform_thickavg                             0.0701                  0
## Avg_inferiorparietal_thickavg                       0.04                  0
## Avg_inferiortemporal_thickavg                     0.2616                  0
## Avg_isthmuscingulate_thickavg                     0.1973                  0
## Avg_lateraloccipital_thickavg                     0.3478                  0
## Avg_lateralorbitofrontal_thickavg                 0.0141                  0
## Avg_lingual_thickavg                              0.0874             0.0021
## Avg_medialorbitofrontal_thickavg                  0.0326                  0
## Avg_middletemporal_thickavg                       0.0696                  0
## Avg_parahippocampal_thickavg                           1                  0
## Avg_paracentral_thickavg                          0.1967                  0
## Avg_parsopercularis_thickavg                      0.0095                  0
## Avg_parsorbitalis_thickavg                        0.0879                  0
## Avg_parstriangularis_thickavg                      0.032                  0
## Avg_pericalcarine_thickavg                         0.322             0.0032
## Avg_postcentral_thickavg                          0.2203                  0
## Avg_posteriorcingulate_thickavg                   0.0491                  0
## Avg_precentral_thickavg                           0.0869                  0
## Avg_precuneus_thickavg                              0.04                  0
## Avg_rostralanteriorcingulate_thickavg             0.1255                  0
## Avg_rostralmiddlefrontal_thickavg                 0.1163                  0
## Avg_superiorfrontal_thickavg                      0.0527                  0
## Avg_superiorparietal_thickavg                     0.2515                  0
## Avg_superiortemporal_thickavg                      0.009                  0
## Avg_supramarginal_thickavg                        0.0481                  0
## Avg_frontalpole_thickavg                          0.4658                  0
## Avg_temporalpole_thickavg                         0.8541                  0
## Avg_transversetemporal_thickavg                   0.0763              2e-04
## Avg_insula_thickavg                               0.0015                  0
## Avg_bankssts_surfavg                              0.0909             0.2319
## Avg_caudalanteriorcingulate_surfavg               0.3571                  0
## Avg_caudalmiddlefrontal_surfavg                   0.0663             0.0655
## Avg_cuneus_surfavg                                0.1326                  1
## Avg_entorhinal_surfavg                                 1                  1
## Avg_fusiform_surfavg                              0.0901                  0
## Avg_inferiorparietal_surfavg                      0.0499             0.2444
## Avg_inferiortemporal_surfavg                      0.1274             0.3898
## Avg_isthmuscingulate_surfavg                      0.1025                  1
## Avg_lateraloccipital_surfavg                      0.0707             0.5364
## Avg_lateralorbitofrontal_surfavg                  0.1197                  1
## Avg_lingual_surfavg                               0.2346             0.1977
## Avg_medialorbitofrontal_surfavg                   0.3265                  1
## Avg_middletemporal_surfavg                        0.0293             0.2578
## Avg_parahippocampal_surfavg                       0.2176             0.0039
## Avg_paracentral_surfavg                           0.6746                  1
## Avg_parsopercularis_surfavg                        0.179             0.0058
## Avg_parsorbitalis_surfavg                         0.1152                  1
## Avg_parstriangularis_surfavg                      0.0821                0.4
## Avg_pericalcarine_surfavg                         0.7484                  1
## Avg_postcentral_surfavg                           0.1972                  1
## Avg_posteriorcingulate_surfavg                    0.0964             0.0254
## Avg_precentral_surfavg                            0.1489                  1
## Avg_precuneus_surfavg                             0.0593             0.5825
## Avg_rostralanteriorcingulate_surfavg               0.277             0.0083
## Avg_rostralmiddlefrontal_surfavg                    0.03             0.6122
## Avg_superiorfrontal_surfavg                       0.0447                  1
## Avg_superiorparietal_surfavg                      0.0598             0.9327
## Avg_superiortemporal_surfavg                      0.0507             0.0743
## Avg_supramarginal_surfavg                         0.1609             0.9764
## Avg_frontalpole_surfavg                           0.1283                  1
## Avg_temporalpole_surfavg                          0.9549             0.6381
## Avg_transversetemporal_surfavg                    0.2309              1e-04
## Avg_insula_surfavg                                0.3511             0.1173
## ICV                                               0.0447                  1
## Avg_LatVent                                            1                  0
## Avg_thal                                          0.0041                  0
## Avg_caud                                          0.0276                  0
## Avg_put                                           0.0036             0.0014
## Avg_pal                                           0.1542              2e-04
## Avg_hippo                                         0.0289                  0
## Avg_amyg                                          0.0389                  0
## Avg_accumb                                        0.0598              1e-04
##                                       Group.2.vs.Group.3 age.sex.edu.adj.p
## Avg_bankssts_thickavg                                  0                 0
## Avg_caudalanteriorcingulate_thickavg              0.2333            0.0409
## Avg_caudalmiddlefrontal_thickavg                       0                 0
## Avg_cuneus_thickavg                               0.0342            0.0028
## Avg_entorhinal_thickavg                            0.106             0.009
## Avg_fusiform_thickavg                                  0                 0
## Avg_inferiorparietal_thickavg                          0                 0
## Avg_inferiortemporal_thickavg                          0                 0
## Avg_isthmuscingulate_thickavg                          0                 0
## Avg_lateraloccipital_thickavg                          0                 0
## Avg_lateralorbitofrontal_thickavg                      0                 0
## Avg_lingual_thickavg                              0.8635            0.1227
## Avg_medialorbitofrontal_thickavg                       0                 0
## Avg_middletemporal_thickavg                            0                 0
## Avg_parahippocampal_thickavg                      0.0015            0.0013
## Avg_paracentral_thickavg                           1e-04                 0
## Avg_parsopercularis_thickavg                           0                 0
## Avg_parsorbitalis_thickavg                             0                 0
## Avg_parstriangularis_thickavg                          0                 0
## Avg_pericalcarine_thickavg                        0.4597            0.0384
## Avg_postcentral_thickavg                               0                 0
## Avg_posteriorcingulate_thickavg                        0                 0
## Avg_precentral_thickavg                                0                 0
## Avg_precuneus_thickavg                                 0                 0
## Avg_rostralanteriorcingulate_thickavg                  0                 0
## Avg_rostralmiddlefrontal_thickavg                      0                 0
## Avg_superiorfrontal_thickavg                           0                 0
## Avg_superiorparietal_thickavg                          0                 0
## Avg_superiortemporal_thickavg                          0                 0
## Avg_supramarginal_thickavg                             0                 0
## Avg_frontalpole_thickavg                               0                 0
## Avg_temporalpole_thickavg                              0                 0
## Avg_transversetemporal_thickavg                    0.376            0.1047
## Avg_insula_thickavg                                1e-04                 0
## Avg_bankssts_surfavg                                   1            0.7216
## Avg_caudalanteriorcingulate_surfavg               0.0067                 0
## Avg_caudalmiddlefrontal_surfavg                        1            0.5458
## Avg_cuneus_surfavg                                 0.084            0.3083
## Avg_entorhinal_surfavg                                 1            0.6834
## Avg_fusiform_surfavg                              0.1505            0.0011
## Avg_inferiorparietal_surfavg                           1            0.8124
## Avg_inferiortemporal_surfavg                           1             0.499
## Avg_isthmuscingulate_surfavg                      0.1578            0.4455
## Avg_lateraloccipital_surfavg                           1            0.7861
## Avg_lateralorbitofrontal_surfavg                  0.0942            0.1936
## Avg_lingual_surfavg                                    1            0.6034
## Avg_medialorbitofrontal_surfavg                        1            0.5111
## Avg_middletemporal_surfavg                             1            0.7006
## Avg_parahippocampal_surfavg                       0.6532              0.05
## Avg_paracentral_surfavg                            0.267            0.6824
## Avg_parsopercularis_surfavg                       0.8668            0.1408
## Avg_parsorbitalis_surfavg                         0.0629            0.2937
## Avg_parstriangularis_surfavg                           1            0.8744
## Avg_pericalcarine_surfavg                              1            0.9945
## Avg_postcentral_surfavg                                1            0.6086
## Avg_posteriorcingulate_surfavg                         1            0.2876
## Avg_precentral_surfavg                            0.5419            0.7704
## Avg_precuneus_surfavg                                  1            0.8976
## Avg_rostralanteriorcingulate_surfavg              0.7681            0.0092
## Avg_rostralmiddlefrontal_surfavg                  0.0046            0.0126
## Avg_superiorfrontal_surfavg                       0.3747            0.6303
## Avg_superiorparietal_surfavg                      0.8684            0.7222
## Avg_superiortemporal_surfavg                           1            0.3639
## Avg_supramarginal_surfavg                              1            0.6267
## Avg_frontalpole_surfavg                           0.4425            0.7582
## Avg_temporalpole_surfavg                          0.1882            0.5636
## Avg_transversetemporal_surfavg                    0.1033            0.0023
## Avg_insula_surfavg                                     1            0.0757
## ICV                                               0.1743            0.7507
## Avg_LatVent                                            0             2e-04
## Avg_thal                                               0                 0
## Avg_caud                                          0.4039            0.0198
## Avg_put                                                1            0.1782
## Avg_pal                                                0                 0
## Avg_hippo                                              0                 0
## Avg_amyg                                          0.0175                 0
## Avg_accumb                                        0.3101            0.0646
##                                       bonf.adj.p.value
## Avg_bankssts_thickavg                           0.0000
## Avg_caudalanteriorcingulate_thickavg            0.1078
## Avg_caudalmiddlefrontal_thickavg                0.0000
## Avg_cuneus_thickavg                             0.0000
## Avg_entorhinal_thickavg                         0.7854
## Avg_fusiform_thickavg                           0.0000
## Avg_inferiorparietal_thickavg                   0.0000
## Avg_inferiortemporal_thickavg                   0.0000
## Avg_isthmuscingulate_thickavg                   0.0000
## Avg_lateraloccipital_thickavg                   0.0000
## Avg_lateralorbitofrontal_thickavg               0.0000
## Avg_lingual_thickavg                            0.1078
## Avg_medialorbitofrontal_thickavg                0.0000
## Avg_middletemporal_thickavg                     0.0000
## Avg_parahippocampal_thickavg                    0.0000
## Avg_paracentral_thickavg                        0.0000
## Avg_parsopercularis_thickavg                    0.0000
## Avg_parsorbitalis_thickavg                      0.0000
## Avg_parstriangularis_thickavg                   0.0000
## Avg_pericalcarine_thickavg                      0.2772
## Avg_postcentral_thickavg                        0.0000
## Avg_posteriorcingulate_thickavg                 0.0000
## Avg_precentral_thickavg                         0.0000
## Avg_precuneus_thickavg                          0.0000
## Avg_rostralanteriorcingulate_thickavg           0.0000
## Avg_rostralmiddlefrontal_thickavg               0.0000
## Avg_superiorfrontal_thickavg                    0.0000
## Avg_superiorparietal_thickavg                   0.0000
## Avg_superiortemporal_thickavg                   0.0000
## Avg_supramarginal_thickavg                      0.0000
## Avg_frontalpole_thickavg                        0.0000
## Avg_temporalpole_thickavg                       0.0000
## Avg_transversetemporal_thickavg                 0.0154
## Avg_insula_thickavg                             0.0000
## Avg_bankssts_surfavg                            1.0000
## Avg_caudalanteriorcingulate_surfavg             0.0000
## Avg_caudalmiddlefrontal_surfavg                 1.0000
## Avg_cuneus_surfavg                              1.0000
## Avg_entorhinal_surfavg                          1.0000
## Avg_fusiform_surfavg                            0.0000
## Avg_inferiorparietal_surfavg                    1.0000
## Avg_inferiortemporal_surfavg                    1.0000
## Avg_isthmuscingulate_surfavg                    1.0000
## Avg_lateraloccipital_surfavg                    1.0000
## Avg_lateralorbitofrontal_surfavg                1.0000
## Avg_lingual_surfavg                             1.0000
## Avg_medialorbitofrontal_surfavg                 1.0000
## Avg_middletemporal_surfavg                      1.0000
## Avg_parahippocampal_surfavg                     0.2772
## Avg_paracentral_surfavg                         1.0000
## Avg_parsopercularis_surfavg                     0.3542
## Avg_parsorbitalis_surfavg                       1.0000
## Avg_parstriangularis_surfavg                    1.0000
## Avg_pericalcarine_surfavg                       1.0000
## Avg_postcentral_surfavg                         1.0000
## Avg_posteriorcingulate_surfavg                  0.8085
## Avg_precentral_surfavg                          1.0000
## Avg_precuneus_surfavg                           1.0000
## Avg_rostralanteriorcingulate_surfavg            0.5852
## Avg_rostralmiddlefrontal_surfavg                0.3465
## Avg_superiorfrontal_surfavg                     1.0000
## Avg_superiorparietal_surfavg                    1.0000
## Avg_superiortemporal_surfavg                    1.0000
## Avg_supramarginal_surfavg                       1.0000
## Avg_frontalpole_surfavg                         1.0000
## Avg_temporalpole_surfavg                        1.0000
## Avg_transversetemporal_surfavg                  0.0077
## Avg_insula_surfavg                              1.0000
## ICV                                             1.0000
## Avg_LatVent                                     0.0000
## Avg_thal                                        0.0000
## Avg_caud                                        0.0000
## Avg_put                                         0.0077
## Avg_pal                                         0.0000
## Avg_hippo                                       0.0000
## Avg_amyg                                        0.0000
## Avg_accumb                                      0.0077
##                                       bonf.age.sex.edu.adj.p.value
## Avg_bankssts_thickavg                                       0.0000
## Avg_caudalanteriorcingulate_thickavg                        1.0000
## Avg_caudalmiddlefrontal_thickavg                            0.0000
## Avg_cuneus_thickavg                                         0.2156
## Avg_entorhinal_thickavg                                     0.6930
## Avg_fusiform_thickavg                                       0.0000
## Avg_inferiorparietal_thickavg                               0.0000
## Avg_inferiortemporal_thickavg                               0.0000
## Avg_isthmuscingulate_thickavg                               0.0000
## Avg_lateraloccipital_thickavg                               0.0000
## Avg_lateralorbitofrontal_thickavg                           0.0000
## Avg_lingual_thickavg                                        1.0000
## Avg_medialorbitofrontal_thickavg                            0.0000
## Avg_middletemporal_thickavg                                 0.0000
## Avg_parahippocampal_thickavg                                0.1001
## Avg_paracentral_thickavg                                    0.0000
## Avg_parsopercularis_thickavg                                0.0000
## Avg_parsorbitalis_thickavg                                  0.0000
## Avg_parstriangularis_thickavg                               0.0000
## Avg_pericalcarine_thickavg                                  1.0000
## Avg_postcentral_thickavg                                    0.0000
## Avg_posteriorcingulate_thickavg                             0.0000
## Avg_precentral_thickavg                                     0.0000
## Avg_precuneus_thickavg                                      0.0000
## Avg_rostralanteriorcingulate_thickavg                       0.0000
## Avg_rostralmiddlefrontal_thickavg                           0.0000
## Avg_superiorfrontal_thickavg                                0.0000
## Avg_superiorparietal_thickavg                               0.0000
## Avg_superiortemporal_thickavg                               0.0000
## Avg_supramarginal_thickavg                                  0.0000
## Avg_frontalpole_thickavg                                    0.0000
## Avg_temporalpole_thickavg                                   0.0000
## Avg_transversetemporal_thickavg                             1.0000
## Avg_insula_thickavg                                         0.0000
## Avg_bankssts_surfavg                                        1.0000
## Avg_caudalanteriorcingulate_surfavg                         0.0000
## Avg_caudalmiddlefrontal_surfavg                             1.0000
## Avg_cuneus_surfavg                                          1.0000
## Avg_entorhinal_surfavg                                      1.0000
## Avg_fusiform_surfavg                                        0.0847
## Avg_inferiorparietal_surfavg                                1.0000
## Avg_inferiortemporal_surfavg                                1.0000
## Avg_isthmuscingulate_surfavg                                1.0000
## Avg_lateraloccipital_surfavg                                1.0000
## Avg_lateralorbitofrontal_surfavg                            1.0000
## Avg_lingual_surfavg                                         1.0000
## Avg_medialorbitofrontal_surfavg                             1.0000
## Avg_middletemporal_surfavg                                  1.0000
## Avg_parahippocampal_surfavg                                 1.0000
## Avg_paracentral_surfavg                                     1.0000
## Avg_parsopercularis_surfavg                                 1.0000
## Avg_parsorbitalis_surfavg                                   1.0000
## Avg_parstriangularis_surfavg                                1.0000
## Avg_pericalcarine_surfavg                                   1.0000
## Avg_postcentral_surfavg                                     1.0000
## Avg_posteriorcingulate_surfavg                              1.0000
## Avg_precentral_surfavg                                      1.0000
## Avg_precuneus_surfavg                                       1.0000
## Avg_rostralanteriorcingulate_surfavg                        0.7084
## Avg_rostralmiddlefrontal_surfavg                            0.9702
## Avg_superiorfrontal_surfavg                                 1.0000
## Avg_superiorparietal_surfavg                                1.0000
## Avg_superiortemporal_surfavg                                1.0000
## Avg_supramarginal_surfavg                                   1.0000
## Avg_frontalpole_surfavg                                     1.0000
## Avg_temporalpole_surfavg                                    1.0000
## Avg_transversetemporal_surfavg                              0.1771
## Avg_insula_surfavg                                          1.0000
## ICV                                                         1.0000
## Avg_LatVent                                                 0.0154
## Avg_thal                                                    0.0000
## Avg_caud                                                    1.0000
## Avg_put                                                     1.0000
## Avg_pal                                                     0.0000
## Avg_hippo                                                   0.0000
## Avg_amyg                                                    0.0000
## Avg_accumb                                                  1.0000


Ridge (Nested CV)


library(caret)
## 필요한 패키지를 로딩중입니다: lattice
library(parallel)

Nfolds <- 10

f <- as.formula(paste("Age~",paste(colnames(Train_HC_data)[5:81],collapse = "+"),sep=""))
fitcontrol <- trainControl(method = "repeatedcv", number=Nfolds, search = "random")

ridge.fun <- function(N,i){
  Nfolds <- N
  set.seed(i)
  outer_folds <- createFolds(Train_HC_data$Age,k=Nfolds)
  cv.glmnet <- lapply(outer_folds,function(index) {
    tr <- Train_HC_data[-index,]
    ts <- Train_HC_data[index,]
    fit <- train(f,data=tr,method="ridge",metric="MAE",
               trControl=fitcontrol)
    c(postResample(predict(fit,ts),ts$Age),lambda=fit$bestTune$lambda)
    })
}

Ntimes <- 10
RES <- NULL
for(i in 1:Ntimes){
  res <- mclapply(10,i,FUN=ridge.fun,mc.cores = 6)
  RES <- rbind(RES,t(data.frame(res)))
}
 
library(glmnet)
## 필요한 패키지를 로딩중입니다: Matrix
## Loaded glmnet 4.1-6
ridge.fit <- glmnet(as.matrix(Train_HC_data[,c(5:81)]),Train_HC_data[,2],
                    alpha=0,lambda = RES[,4][which.min(RES[,3])])




# Train Control

ctrl_pred_age <- predict(ridge.fit, newx = as.matrix(Train_HC_data[,5:81]))
train_error_ctrl_rid <- ctrl_pred_age-Train_HC_data$Age

par(mfrow=c(1,1))
plot(ctrl_pred_age,Train_HC_data$Age,
     xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Train control data)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",
       legend=c(paste("r=",round(cor(Train_HC_data$Age,ctrl_pred_age),4)),
                paste("R2=",round(cor(Train_HC_data$Age,ctrl_pred_age)^2,4)),
                paste("MAE=",round(sum(abs(train_error_ctrl_rid)/length(train_error_ctrl_rid)),4)),
                paste("VEcv=",round(1-sum(train_error_ctrl_rid^2/(var(Train_HC_data$Age)*(length(Train_HC_data$Age)-1))),4)*100,"%")) ,bty="n")

# Test Control

ctrl_pred_age <- predict(ridge.fit, newx = as.matrix(Test_HC_data[,5:81]))
test_error_ctrl_rid <- ctrl_pred_age-Test_HC_data$Age

plot(ctrl_pred_age,Test_HC_data$Age,
     xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Test control data)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",
       legend=c(paste("r=",round(cor(Test_HC_data$Age,ctrl_pred_age),4)),
                paste("R2=",round(cor(Test_HC_data$Age,ctrl_pred_age)^2,4)),
                paste("MAE=",round(sum(abs(test_error_ctrl_rid)/length(test_error_ctrl_rid)),4)),
                paste("VEcv=",round(1-sum(test_error_ctrl_rid^2/(var(Test_HC_data$Age)*(length(Test_HC_data$Age)-1))),4)*100,"%")) ,bty="n")

# Patient

pat_pred_age <- predict(ridge.fit, newx = as.matrix(Test_Patient_dat[,5:81]))
test_error_pat_rid <- pat_pred_age-Test_Patient_dat$Age

plot(pat_pred_age,Test_Patient_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Patient)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_Patient_dat$Age,pat_pred_age),4)),
                          paste("R2=",round(cor(Test_Patient_dat$Age,pat_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_pat_rid)/length(test_error_pat_rid)),4)),
paste("VEcv=",round(1-sum(test_error_pat_rid^2/(var(Test_Patient_dat$Age)*(length(Test_Patient_dat$Age)-1))),4)*100,"%")),bty="n")

# Patient - Schizo
sch_pred_age <- predict(ridge.fit, newx = as.matrix(Test_schizophrenia_dat[,5:81]))
test_error_sch_rid <- sch_pred_age-Test_schizophrenia_dat$Age

plot(sch_pred_age,Test_schizophrenia_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Schizophrenia)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_schizophrenia_dat$Age,sch_pred_age),4)),
                          paste("R2=",round(cor(Test_schizophrenia_dat$Age,sch_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_sch_rid)/length(test_error_sch_rid)),4)),
paste("VEcv=",round(1-sum(test_error_sch_rid^2/(var(Test_schizophrenia_dat$Age)*(length(Test_schizophrenia_dat$Age)-1))),4)*100,"%")),bty="n")

# Patient - TRS
TRS_pred_age <- predict(ridge.fit, newx = as.matrix(Test_TRS_dat[,5:81]))
test_error_trs_rid <- TRS_pred_age-Test_TRS_dat$Age

plot(TRS_pred_age,Test_TRS_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (TRS)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_TRS_dat$Age,TRS_pred_age),4)),
                          paste("R2=",round(cor(Test_TRS_dat$Age,TRS_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_trs_rid)/length(test_error_trs_rid)),4)),
paste("VEcv=",round(1-sum(test_error_trs_rid^2/(var(Test_TRS_dat$Age)*(length(Test_TRS_dat$Age)-1))),4)*100,"%")),bty="n")

# Patient - FESSD
SSD_pred_age <- predict(ridge.fit, newx = as.matrix(Test_FESSD_dat[,5:81]))
test_error_ssd_rid <- SSD_pred_age-Test_FESSD_dat$Age

plot(SSD_pred_age,Test_FESSD_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (FE-SSD)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_FESSD_dat$Age,SSD_pred_age),4)),
                          paste("R2=",round(cor(Test_FESSD_dat$Age,SSD_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_ssd_rid)/length(test_error_ssd_rid)),4)),
paste("VEcv=",round(1-sum(test_error_ssd_rid^2/(var(Test_FESSD_dat$Age)*(length(Test_FESSD_dat$Age)-1))),4)*100,"%")),bty="n")

Brain PAD correction-plot

#### Brain PAD correction-HC
pred_age <- predict(ridge.fit,newx=as.matrix(Test_HC_data[,5:81]))
test_error_rid <- pred_age-Test_HC_data$Age
PAD_rid <- test_error_rid
cor.test(PAD_rid,Test_HC_data$Age)$p.value
## [1] 6.500326e-17
png("1_rigde_corr.png", width = 800, height = 600)
par(mfrow=c(2,2), oma=c(1,3,1,1))

plot(Test_HC_data$Age,PAD_rid,xlim=c(10,80),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test HC)",ylab="Brain PAD uncorrected")
abline(h=0,lty=2,col="red")
abline(lm(PAD_rid~Test_HC_data$Age))
legend("topright",legend=paste("r=",round(cor(PAD_rid,Test_HC_data$Age),4),"(p<0.05)",sep=""))
mtext("A", side=3, line=-3, cex=2, adj = -0.0, outer=TRUE)


Model_linear <- lm(PAD_rid~Test_HC_data$Age+Test_HC_data$Sex)
NullModel <- lm(PAD_rid~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ 1
## Model 2: PAD_rid ~ Test_HC_data$Age + Test_HC_data$Sex
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    208 16814                           
## 2    206 11986  2    4827.5 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test_HC_data$Age2 <- Test_HC_data$Age^2

Model_quad <- lm(PAD_rid~Test_HC_data$Sex+Test_HC_data$Age+Test_HC_data$Age2)
anova(NullModel,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ 1
## Model 2: PAD_rid ~ Test_HC_data$Sex + Test_HC_data$Age + Test_HC_data$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    208 16814                           
## 2    205 11924  3    4889.2 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model_linear,Model_quad,test="Chisq") # Not Reject H0 (Reduced model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ Test_HC_data$Age + Test_HC_data$Sex
## Model 2: PAD_rid ~ Test_HC_data$Sex + Test_HC_data$Age + Test_HC_data$Age2
##   Res.Df   RSS Df Sum of Sq Pr(>Chi)
## 1    206 11986                      
## 2    205 11924  1    61.661   0.3032
cPAD_rid <- lm(PAD_rid~+Test_HC_data$Sex+Test_HC_data$Age)$residuals

plot(Test_HC_data$Age,cPAD_rid,xlim=c(10,75),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test HC)",ylab="Brain PAD corrected for age")
abline(lm(cPAD_rid~Test_HC_data$Age))
abline(h=0,lty=2,col="red")
mtext("B", side=3, line=-3, cex=2, adj = 0.5, outer=TRUE)


#### Brain PAD correction-Patient
pred_age <- predict(ridge.fit,newx=as.matrix(Test_Patient_dat[,5:81]))
test_error_rid <- pred_age-Test_Patient_dat$Age
PAD_rid <- test_error_rid

plot(Test_Patient_dat$Age,PAD_rid,xlim=c(10,80),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test Patient)",ylab="Brain PAD uncorrected")
abline(h=0,lty=2,col="red")
abline(lm(PAD_rid~Test_Patient_dat$Age))
legend("topright",legend=paste("r=",round(cor(PAD_rid,Test_Patient_dat$Age),4),"(p<0.05)",sep=""))
cor.test(PAD_rid,Test_Patient_dat$Age)$p.value
## [1] 1.140602e-13
mtext("C", side=3, line=-27, cex=2, adj = -0.0, outer=TRUE)


Model_linear <- lm(PAD_rid~Test_Patient_dat$Age+Test_Patient_dat$Sex)
NullModel <- lm(PAD_rid~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ 1
## Model 2: PAD_rid ~ Test_Patient_dat$Age + Test_Patient_dat$Sex
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    291 30996                           
## 2    289 25621  2    5374.6 6.847e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test_Patient_dat$Age2 <- Test_Patient_dat$Age^2

Model_quad <- lm(PAD_rid~Test_Patient_dat$Sex+Test_Patient_dat$Age+Test_Patient_dat$Age2)
anova(NullModel,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ 1
## Model 2: PAD_rid ~ Test_Patient_dat$Sex + Test_Patient_dat$Age + Test_Patient_dat$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    291 30996                           
## 2    288 25175  3    5820.6 2.296e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model_linear,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ Test_Patient_dat$Age + Test_Patient_dat$Sex
## Model 2: PAD_rid ~ Test_Patient_dat$Sex + Test_Patient_dat$Age + Test_Patient_dat$Age2
##   Res.Df   RSS Df Sum of Sq Pr(>Chi)  
## 1    289 25621                        
## 2    288 25175  1    445.92  0.02391 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cPAD_rid <- lm(PAD_rid~+Test_Patient_dat$Sex+Test_Patient_dat$Age+Test_Patient_dat$Age2)$residuals

plot(Test_Patient_dat$Age,cPAD_rid,xlim=c(10,75),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test Patient)",ylab="Brain PAD corrected for age")
abline(lm(cPAD_rid~Test_Patient_dat$Age))
abline(h=0,lty=2,col="red")
mtext("D", side=3, line=-27, cex=2, adj = 0.5, outer=TRUE)

dev.off()
## png 
##   2

Brain PAD correction -table

test_dat <- rbind(Test_HC_data,Test_Patient_dat)
test_dat$Dx <- c(rep(0,nrow(Test_HC_data)),rep(1,nrow(Test_Patient_dat)))
test_dat$Dx <- factor(test_dat$Dx)

pred_age <- predict(ridge.fit,newx=as.matrix(test_dat[,5:81]))
test_error_rid <- pred_age-test_dat$Age
PAD_rid <- test_error_rid
mean(PAD_rid) ; sd(PAD_rid) ; median(PAD_rid)
## [1] 2.029919
## [1] 10.30311
## [1] 2.275126
Model_linear <- lm(PAD_rid~test_dat$Age+test_dat$Sex)
NullModel <- lm(PAD_rid~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ 1
## Model 2: PAD_rid ~ test_dat$Age + test_dat$Sex
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    500 53077                           
## 2    498 42310  2     10768 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test_dat$Age2 <- test_dat$Age^2

Model_quad <- lm(PAD_rid~test_dat$Sex+test_dat$Age+test_dat$Age2)
anova(NullModel,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ 1
## Model 2: PAD_rid ~ test_dat$Sex + test_dat$Age + test_dat$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    500 53077                           
## 2    497 41623  3     11454 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model_linear,Model_quad,test="Chisq") # Reject H0 
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ test_dat$Age + test_dat$Sex
## Model 2: PAD_rid ~ test_dat$Sex + test_dat$Age + test_dat$Age2
##   Res.Df   RSS Df Sum of Sq Pr(>Chi)   
## 1    498 42310                         
## 2    497 41623  1     686.5 0.004196 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cPAD_rid <- lm(PAD_rid~+test_dat$Sex+test_dat$Age+test_dat$Age2)$residuals
mean(cPAD_rid) ; sd(cPAD_rid) ; median(cPAD_rid)
## [1] 6.336415e-18
## [1] 9.123928
## [1] 0.008692962
library(plotrix)
dat <- data.frame(SubjID=test_dat$SubjID,Brain_PAD = c(PAD_rid),Status=rep(c("Control","Patient"),c(nrow(Test_HC_data),nrow(Test_Patient_dat))))

dat$status_sch <- dat$status_trs <- dat$status_ssd <- rep(NA,nrow(dat))
dat$status_sch[test_dat$SubjID %in% Test_HC_data$SubjID ] <- "Control"   
dat$status_sch[test_dat$SubjID %in% Test_schizophrenia_dat$SubjID ] <- "Schizophrenia"
dat$status_trs[test_dat$SubjID %in% Test_TRS_dat$SubjID ] <- "TRS"
dat$status_trs[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

dat$status_ssd[test_dat$SubjID %in% Test_FESSD_dat$SubjID ] <- "FE-SSD"
dat$status_ssd[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

library(dplyr)
palette <- c("FE-SSD"="darkgreen","TRS"="red","Control"="grey","Schizophrenia"="blue","Patient"="orange")

dat %>% dplyr::select(Brain_PAD,Status) %>% group_by(Status) %>% summarise_each(funs(mean,sd,std.error))
## Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `across()` instead.
## ℹ The deprecated feature was likely used in the dplyr package.
##   Please report the issue at <https://github.com/tidyverse/dplyr/issues>.
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
## 
## # Simple named list: list(mean = mean, median = median)
## 
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
## 
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## # A tibble: 2 × 4
##   Status   mean    sd std.error
##   <chr>   <dbl> <dbl>     <dbl>
## 1 Control -1.80  8.99     0.622
## 2 Patient  4.77 10.3      0.604
dat %>% dplyr::select(Brain_PAD,status_sch) %>% group_by(status_sch) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_sch     mean    sd std.error
##   <chr>         <dbl> <dbl>     <dbl>
## 1 Control       -1.80  8.99     0.622
## 2 Schizophrenia  4.57 10.5      0.748
## 3 <NA>           5.18 10.0      1.03
dat %>% dplyr::select(Brain_PAD,status_trs) %>% group_by(status_trs) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_trs  mean    sd std.error
##   <chr>      <dbl> <dbl>     <dbl>
## 1 Control    -1.80  8.99     0.622
## 2 TRS         6.93  7.61     1.12 
## 3 <NA>        4.37 10.7      0.683
dat %>% dplyr::select(Brain_PAD,status_ssd) %>% group_by(status_ssd) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_ssd  mean    sd std.error
##   <chr>      <dbl> <dbl>     <dbl>
## 1 Control    -1.80  8.99     0.622
## 2 FE-SSD      5.36 10.7      1.26 
## 3 <NA>        4.58 10.2      0.689
library(ggplot2)
ggplot(dat[!is.na(dat$status_sch),],aes(x = Brain_PAD, fill = status_sch)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_ssd),],aes(x = Brain_PAD, fill = status_ssd)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_trs),],aes(x = Brain_PAD, fill = status_trs)) +scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

summary(lm(Brain_PAD~Status+test_dat$Sex+test_dat$Age+test_dat$Age2,data=dat))
## 
## Call:
## lm(formula = Brain_PAD ~ Status + test_dat$Sex + test_dat$Age + 
##     test_dat$Age2, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.612  -5.405   0.288   5.747  28.796 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.159496   3.705892   0.583   0.5603    
## StatusPatient  6.100653   0.793894   7.684 8.29e-14 ***
## test_dat$Sex2  0.069167   0.804354   0.086   0.9315    
## test_dat$Age   0.144811   0.200348   0.723   0.4701    
## test_dat$Age2 -0.005796   0.002450  -2.366   0.0184 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.66 on 496 degrees of freedom
## Multiple R-squared:  0.2992, Adjusted R-squared:  0.2936 
## F-statistic: 52.95 on 4 and 496 DF,  p-value: < 2.2e-16
summary(lm(Brain_PAD~status_sch+test_dat$Sex+test_dat$Age+test_dat$Age2,data=dat))
## 
## Call:
## lm(formula = Brain_PAD ~ status_sch + test_dat$Sex + test_dat$Age + 
##     test_dat$Age2, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.067  -5.375   0.239   5.814  23.571 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2.523783   4.220599   0.598   0.5502    
## status_schSchizophrenia  6.609838   0.893827   7.395 8.37e-13 ***
## test_dat$Sex2           -0.014003   0.902186  -0.016   0.9876    
## test_dat$Age             0.114771   0.226572   0.507   0.6127    
## test_dat$Age2           -0.005294   0.002735  -1.936   0.0536 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.664 on 400 degrees of freedom
##   (결측으로 인하여 96개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.2905, Adjusted R-squared:  0.2834 
## F-statistic: 40.94 on 4 and 400 DF,  p-value: < 2.2e-16
dat_for_regression <- dat
replicated <- setdiff(which(!is.na(dat$status_trs) & !is.na(dat$status_ssd)),1:214)
dat[replicated,]
##     SubjID Brain_PAD  Status status_ssd status_trs    status_sch
## 474    T19 25.156462 Patient     FE-SSD        TRS Schizophrenia
## 478    T24  4.526292 Patient     FE-SSD        TRS Schizophrenia
dat_for_regression <- rbind(dat_for_regression,dat[replicated,])
dat_for_regression[replicated,]$status_ssd <- NA
dat_for_regression[(nrow(dat_for_regression)-1):nrow(dat_for_regression),]$status_trs <- NA

table(dat_for_regression$status_trs)
## 
## Control     TRS 
##     209      46
table(dat_for_regression$status_ssd)
## 
## Control  FE-SSD 
##     209      72
table(dat_for_regression$status_sch)
## 
##       Control Schizophrenia 
##           209           198
test_dat_for_regression <- test_dat
test_dat_for_regression <- rbind(test_dat_for_regression,test_dat[replicated,])

dat_for_regression$status_ssd[is.na(dat_for_regression$status_ssd)] = dat_for_regression$status_trs[is.na(dat_for_regression$status_ssd)]

summary(lm(Brain_PAD~dat_for_regression$status_ssd+test_dat_for_regression$Sex+
             test_dat_for_regression$Age+
             test_dat_for_regression$Age2,data=dat_for_regression))
## 
## Call:
## lm(formula = Brain_PAD ~ dat_for_regression$status_ssd + test_dat_for_regression$Sex + 
##     test_dat_for_regression$Age + test_dat_for_regression$Age2, 
##     data = dat_for_regression)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.9869  -5.2910  -0.4222   5.6978  24.5246 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          6.876640   4.557268   1.509    0.132    
## dat_for_regression$status_ssdFE-SSD  4.374098   1.104000   3.962 9.16e-05 ***
## dat_for_regression$status_ssdTRS    10.215663   1.369711   7.458 8.26e-13 ***
## test_dat_for_regression$Sex2         0.319701   0.914728   0.350    0.727    
## test_dat_for_regression$Age         -0.085809   0.250401  -0.343    0.732    
## test_dat_for_regression$Age2        -0.003462   0.003032  -1.142    0.254    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.771 on 321 degrees of freedom
##   (결측으로 인하여 176개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.3965, Adjusted R-squared:  0.3871 
## F-statistic: 42.19 on 5 and 321 DF,  p-value: < 2.2e-16
# ANOVA results (Control vs FE-SSD vs TRS)
summary(res_aov <- aov(dat_for_regression$Brain_PAD ~ dat_for_regression$status_ssd))
##                                Df Sum Sq Mean Sq F value   Pr(>F)    
## dat_for_regression$status_ssd   2   4625  2312.7   27.25 1.15e-11 ***
## Residuals                     324  27500    84.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 176개의 관측치가 삭제되었습니다.
pairwise.t.test(dat_for_regression$Brain_PAD,dat_for_regression$status_ssd, p.adj = "fdr")$p.value
##             Control    FE-SSD
## FE-SSD 4.347213e-08        NA
## TRS    4.242295e-08 0.3661718
pairwise.t.test(dat_for_regression$Brain_PAD,dat_for_regression$status_ssd, p.adj = "bonferroni")$p.value
##             Control FE-SSD
## FE-SSD 8.694426e-08     NA
## TRS    4.242295e-08      1
# t.test results (Control vs Schizo)
t.test(dat$Brain_PAD~dat$status_sch)
## 
##  Welch Two Sample t-test
## 
## data:  dat$Brain_PAD by dat$status_sch
## t = -6.5529, df = 385.19, p-value = 1.813e-10
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -8.287568 -4.462116
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -1.802921                    4.571922
t.test(dat$Brain_PAD~dat$status_sch, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  dat$Brain_PAD by dat$status_sch
## t = -6.5849, df = 403, p-value = 1.423e-10
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -8.278004 -4.471680
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -1.802921                    4.571922
#### corrected_Brain_PAD ####
dat <- data.frame(SubjID=test_dat$SubjID,corrected_Brain_PAD = c(cPAD_rid),Status=rep(c("Control","Patient"),c(nrow(Test_HC_data),nrow(Test_Patient_dat))))

dat$status_sch <- dat$status_trs <- dat$status_ssd <- rep(NA,nrow(dat))
dat$status_sch[test_dat$SubjID %in% Test_HC_data$SubjID ] <- "Control"   
dat$status_sch[test_dat$SubjID %in% Test_schizophrenia_dat$SubjID ] <- "Schizophrenia"
dat$status_trs[test_dat$SubjID %in% Test_TRS_dat$SubjID ] <- "TRS"
dat$status_trs[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

dat$status_ssd[test_dat$SubjID %in% Test_FESSD_dat$SubjID ] <- "FE-SSD"
dat$status_ssd[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

dat %>% dplyr::select(corrected_Brain_PAD,Status) %>% group_by(Status) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 2 × 4
##   Status   mean    sd std.error
##   <chr>   <dbl> <dbl>     <dbl>
## 1 Control -3.47  7.60     0.525
## 2 Patient  2.49  9.32     0.546
dat %>% dplyr::select(corrected_Brain_PAD,status_sch) %>% group_by(status_sch) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_sch     mean    sd std.error
##   <chr>         <dbl> <dbl>     <dbl>
## 1 Control       -3.47  7.60     0.525
## 2 Schizophrenia  2.92  9.64     0.689
## 3 <NA>           1.59  8.61     0.879
dat %>% dplyr::select(corrected_Brain_PAD,status_trs) %>% group_by(status_trs) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_trs  mean    sd std.error
##   <chr>      <dbl> <dbl>     <dbl>
## 1 Control    -3.47  7.60     0.525
## 2 TRS         5.79  7.27     1.07 
## 3 <NA>        1.87  9.54     0.609
dat %>% dplyr::select(corrected_Brain_PAD,status_ssd) %>% group_by(status_ssd) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_ssd  mean    sd std.error
##   <chr>      <dbl> <dbl>     <dbl>
## 1 Control    -3.47  7.60     0.525
## 2 FE-SSD      1.38  8.57     1.01 
## 3 <NA>        2.85  9.55     0.644
library(ggplot2)
ggplot(dat,aes(x = corrected_Brain_PAD, fill = Status)) + geom_density(alpha = 0.5) + scale_fill_manual(values=palette[c(3,5)]) +
  coord_cartesian(xlim =c(-30,30)) + theme_bw()

A <- ggplot(dat[!is.na(dat$status_sch),],aes(x = corrected_Brain_PAD, fill = status_sch)) + scale_fill_manual(values=palette[c(3,4)]) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

B <- ggplot(dat[!is.na(dat$status_ssd),],aes(x = corrected_Brain_PAD, fill = status_ssd)) + scale_fill_manual(values=palette[c(3,1)]) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

C <- ggplot(dat[!is.na(dat$status_trs),],aes(x = corrected_Brain_PAD, fill = status_trs)) +scale_fill_manual(values=palette[c(3,2)]) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

png("S5-2(ridge).png", width = 1400, height = 300)
ggarrange(A,B,C,
          labels = c("A","B","C"), 
          ncol = 3, nrow = 1,
          font.label = list(size=20))
dev.off()
## png 
##   2
# ANOVA results (Control vs FE-SSD vs TRS)
dat_for_regression <- dat
replicated <- setdiff(which(!is.na(dat$status_trs) & !is.na(dat$status_ssd)),1:214)
dat[replicated,]
##      SubjID corrected_Brain_PAD  Status status_ssd status_trs    status_sch
## 1040    T19          19.5692669 Patient     FE-SSD        TRS Schizophrenia
## 1044    T24          -0.5369975 Patient     FE-SSD        TRS Schizophrenia
dat_for_regression <- rbind(dat_for_regression,dat[replicated,])
dat_for_regression[replicated,]$status_ssd <- NA
dat_for_regression[(nrow(dat_for_regression)-1):nrow(dat_for_regression),]$status_trs <- NA

table(dat_for_regression$status_trs)
## 
## Control     TRS 
##     209      46
table(dat_for_regression$status_ssd)
## 
## Control  FE-SSD 
##     209      72
test_dat_for_regression <- test_dat
test_dat_for_regression <- rbind(test_dat_for_regression,test_dat[replicated,])

dat_for_regression$status_ssd[is.na(dat_for_regression$status_ssd)] = dat_for_regression$status_trs[is.na(dat_for_regression$status_ssd)]

summary(res_aov <- aov(dat_for_regression$corrected_Brain_PAD~
                         dat_for_regression$status_ssd))
##                                Df Sum Sq Mean Sq F value   Pr(>F)    
## dat_for_regression$status_ssd   2   3802  1900.8   31.43 3.35e-13 ***
## Residuals                     324  19596    60.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 176개의 관측치가 삭제되었습니다.
pairwise.t.test(dat_for_regression$corrected_Brain_PAD,
               dat_for_regression$status_ssd, p.adj = "fdr")$p.value
##             Control      FE-SSD
## FE-SSD 1.068760e-05          NA
## TRS    6.223145e-12 0.002865914
pairwise.t.test(dat_for_regression$corrected_Brain_PAD,
               dat_for_regression$status_ssd, p.adj = "bonferroni")$p.value
##             Control      FE-SSD
## FE-SSD 2.137519e-05          NA
## TRS    6.223145e-12 0.008597741
# t.test results (Control vs Schizo)
t.test(dat$corrected_Brain_PAD~dat$status_sch)
## 
##  Welch Two Sample t-test
## 
## data:  dat$corrected_Brain_PAD by dat$status_sch
## t = -7.3834, df = 370.38, p-value = 1.028e-12
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -8.100374 -4.693147
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -3.473009                    2.923751
t.test(dat$corrected_Brain_PAD~dat$status_sch, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  dat$corrected_Brain_PAD by dat$status_sch
## t = -7.4393, df = 403, p-value = 6.16e-13
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -8.087132 -4.706389
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -3.473009                    2.923751
summary(res_aov <- aov(dat$corrected_Brain_PAD~dat$status_sch))
##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## dat$status_sch   1   4139    4139   55.34 6.16e-13 ***
## Residuals      403  30138      75                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 96개의 관측치가 삭제되었습니다.


SVR[RBF] (Nested CV)


library(kernlab)
## 
## 다음의 패키지를 부착합니다: 'kernlab'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
start.time <- Sys.time()

Nfolds <- 10

svm.fun <- function(N,i){
  Nfolds <- N
  set.seed(i)
  outer_folds <- createFolds(Train_HC_data$Age,k=Nfolds)
  cv.svm <- lapply(outer_folds,function(index) {
    tr <- Train_HC_data[-outer_folds$Fold01,]
    ts <- Train_HC_data[outer_folds$Fold01,]
    svm.fit <- ksvm(as.matrix(tr[, c(5:81)]),
               tr[,2],
               kpar="automatic",
               kernel="rbfdot",cross=Nfolds) 
    
    c(svm.fit@cross,svm.fit@kernelf@kpar[["sigma"]])
    })
}

Ntimes <- 10
RES <- NULL
for(i in 1:Ntimes){
  res <- mclapply(10,i,FUN=svm.fun,mc.cores = 6)
  RES <- rbind(RES,t(data.frame(res)))
}
 
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
## Time difference of 29.16507 secs
svm.fit.77 <- ksvm(as.matrix(Train_HC_data[,c(5:81)]),Train_HC_data[,2],
                  kpar=list(sigma=RES[,2][which.min(RES[,1])]),
                  kernel="rbfdot")
svm.fit.77
## Support Vector Machine object of class "ksvm" 
## 
## SV type: eps-svr  (regression) 
##  parameter : epsilon = 0.1  cost C = 1 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.00784069303007973 
## 
## Number of Support Vectors : 442 
## 
## Objective Function Value : -146.8627 
## Training error : 0.147794
# Train Control

ctrl_pred_age <- predict(svm.fit.77 ,as.matrix(Train_HC_data[,5:81]))
train_error_ctrl_svm <- ctrl_pred_age-Train_HC_data$Age

par(mfrow=c(1,1))
plot(ctrl_pred_age,Train_HC_data$Age,
     xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Train control data)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",
       legend=c(paste("r=",round(cor(Train_HC_data$Age,ctrl_pred_age),4)),
                paste("R2=",round(cor(Train_HC_data$Age,ctrl_pred_age)^2,4)),
                paste("MAE=",round(sum(abs(train_error_ctrl_svm)/length(train_error_ctrl_svm)),4)),
                paste("VEcv=",round(1-sum(train_error_ctrl_svm^2/(var(Train_HC_data$Age)*(length(Train_HC_data$Age)-1))),4)*100,"%")) ,bty="n")

# Test Control

ctrl_pred_age <- predict(svm.fit.77 , as.matrix(Test_HC_data[,5:81]))
test_error_ctrl_svm <- ctrl_pred_age-Test_HC_data$Age

plot(ctrl_pred_age,Test_HC_data$Age,
     xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Test control data)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",
       legend=c(paste("r=",round(cor(Test_HC_data$Age,ctrl_pred_age),4)),
                paste("R2=",round(cor(Test_HC_data$Age,ctrl_pred_age)^2,4)),
                paste("MAE=",round(sum(abs(test_error_ctrl_svm)/length(test_error_ctrl_svm)),4)),
                paste("VEcv=",round(1-sum(test_error_ctrl_svm^2/(var(Test_HC_data$Age)*(length(Test_HC_data$Age)-1))),4)*100,"%")) ,bty="n")

# Patient
pat_pred_age <- predict(svm.fit.77 ,as.matrix(Test_Patient_dat[,5:81]))
test_error_pat_svm <- pat_pred_age-Test_Patient_dat$Age

plot(pat_pred_age,Test_Patient_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Patient)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_Patient_dat$Age,pat_pred_age),4)),paste("R2=",round(cor(Test_Patient_dat$Age,pat_pred_age)^2,4)),                   paste("MAE=",round(sum(abs(test_error_pat_svm)/length(test_error_pat_svm)),4)),
                          paste("VEcv=",round(1-sum(test_error_pat_svm^2/(var(Test_Patient_dat$Age)*(length(Test_Patient_dat$Age)-1))),4)*100,"%")),bty="n")

# Patient - schizo
sch_pred_age <- predict(svm.fit.77, as.matrix(Test_schizophrenia_dat[,5:81]))
test_error_sch_svm <- sch_pred_age-Test_schizophrenia_dat$Age

plot(sch_pred_age,Test_schizophrenia_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Schizophrenia)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_schizophrenia_dat$Age,sch_pred_age),4)),
                          paste("R2=",round(cor(Test_schizophrenia_dat$Age,sch_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_sch_svm)/length(test_error_sch_svm)),4)),
paste("VEcv=",round(1-sum(test_error_sch_svm^2/(var(Test_schizophrenia_dat$Age)*(length(Test_schizophrenia_dat$Age)-1))),4)*100,"%")),bty="n")

# Patient - TRS
TRS_pred_age <- predict(svm.fit.77, as.matrix(Test_TRS_dat[,5:81]))
test_error_trs_svm <- TRS_pred_age-Test_TRS_dat$Age

plot(TRS_pred_age,Test_TRS_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (TRS)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_TRS_dat$Age,TRS_pred_age),4)),
                          paste("R2=",round(cor(Test_TRS_dat$Age,TRS_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_trs_svm)/length(test_error_trs_svm)),4)),
paste("VEcv=",round(1-sum(test_error_trs_svm^2/(var(Test_TRS_dat$Age)*(length(Test_TRS_dat$Age)-1))),4)*100,"%")),bty="n")

# Patient - FESSD
SSD_pred_age <- predict(svm.fit.77, as.matrix(Test_FESSD_dat[,5:81]))
test_error_ssd_svm <- SSD_pred_age-Test_FESSD_dat$Age

plot(SSD_pred_age,Test_FESSD_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (FE-SSD)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_FESSD_dat$Age,SSD_pred_age),4)),
                          paste("R2=",round(cor(Test_FESSD_dat$Age,SSD_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_ssd_svm)/length(test_error_ssd_svm)),4)),
paste("VEcv=",round(1-sum(test_error_ssd_svm^2/(var(Test_FESSD_dat$Age)*(length(Test_FESSD_dat$Age)-1))),4)*100,"%")),bty="n")

Brain PAD correction-plot

#### Brain PAD correction-HC
pred_age <- predict(svm.fit.77, as.matrix(Test_HC_data[,5:81]))
test_error_svm <- pred_age-Test_HC_data$Age
PAD_svm <- test_error_svm

png("2_SVR_corr.png", width = 800, height = 600)
par(mfrow=c(2,2), oma=c(1,3,1,1))

plot(Test_HC_data$Age,PAD_svm,xlim=c(10,80),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test HC)",ylab="Brain PAD uncorrected")
abline(h=0,lty=2,col="red")
abline(lm(PAD_svm~Test_HC_data$Age))
legend("topright",legend=paste("r=",round(cor(PAD_svm,Test_HC_data$Age),4),"(p<0.05)",sep=""))
cor.test(PAD_svm,Test_HC_data$Age)$p.value
## [1] 1.468731e-24
mtext("A", side=3, line=-3, cex=2, adj = -0.0, outer=TRUE)


Model_linear <- lm(PAD_svm~Test_HC_data$Age+Test_HC_data$Sex)
NullModel <- lm(PAD_svm~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_svm ~ 1
## Model 2: PAD_svm ~ Test_HC_data$Age + Test_HC_data$Sex
##   Res.Df     RSS Df Sum of Sq  Pr(>Chi)    
## 1    208 15037.8                           
## 2    206  8915.2  2    6122.7 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test_HC_data$Age2 <- Test_HC_data$Age^2
Model_quad <- lm(PAD_svm~+Test_HC_data$Sex+Test_HC_data$Age+Test_HC_data$Age2)
anova(NullModel,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_svm ~ 1
## Model 2: PAD_svm ~ +Test_HC_data$Sex + Test_HC_data$Age + Test_HC_data$Age2
##   Res.Df     RSS Df Sum of Sq  Pr(>Chi)    
## 1    208 15037.8                           
## 2    205  8883.4  3    6154.5 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model_linear,Model_quad,test="Chisq") # Not Reject H0 (Reduced model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_svm ~ Test_HC_data$Age + Test_HC_data$Sex
## Model 2: PAD_svm ~ +Test_HC_data$Sex + Test_HC_data$Age + Test_HC_data$Age2
##   Res.Df    RSS Df Sum of Sq Pr(>Chi)
## 1    206 8915.2                      
## 2    205 8883.4  1    31.801   0.3916
cPAD_svm <- lm(PAD_svm~+Test_HC_data$Sex+Test_HC_data$Age)$residuals


plot(Test_HC_data$Age,cPAD_svm,xlim=c(10,75),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test HC)",ylab="Brain PAD corrected for age")
abline(lm(cPAD_svm~Test_HC_data$Age))
abline(h=0,lty=2,col="red")
mtext("B", side=3, line=-3, cex=2, adj = 0.5, outer=TRUE)


#### Brain PAD correction-Patient
pred_age <- predict(svm.fit.77, as.matrix(Test_Patient_dat[,5:81]))
test_error_svm <- pred_age-Test_Patient_dat$Age
PAD_svm <- test_error_svm


plot(Test_Patient_dat$Age,PAD_svm,xlim=c(10,80),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test Patient)",ylab="Brain PAD uncorrected")
abline(h=0,lty=2,col="red")
abline(lm(PAD_svm~Test_Patient_dat$Age))
legend("topright",legend=paste("r=",round(cor(PAD_svm,Test_Patient_dat$Age),4),"(p<0.05)",sep=""))
cor.test(PAD_svm,Test_Patient_dat$Age)$p.value
## [1] 3.402669e-27
mtext("C", side=3, line=-27, cex=2, adj = -0.0, outer=TRUE)

Model_linear <- lm(PAD_svm~Test_Patient_dat$Age+Test_Patient_dat$Sex)
NullModel <- lm(PAD_svm~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_svm ~ 1
## Model 2: PAD_svm ~ Test_Patient_dat$Age + Test_Patient_dat$Sex
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    291 30817                           
## 2    289 20244  2     10573 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test_Patient_dat$Age2 <- Test_Patient_dat$Age^2
Model_quad <- lm(PAD_svm~+Test_Patient_dat$Sex+Test_Patient_dat$Age+Test_Patient_dat$Age2)
anova(NullModel,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_svm ~ 1
## Model 2: PAD_svm ~ +Test_Patient_dat$Sex + Test_Patient_dat$Age + Test_Patient_dat$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    291 30817                           
## 2    288 18940  3     11877 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model_linear,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_svm ~ Test_Patient_dat$Age + Test_Patient_dat$Sex
## Model 2: PAD_svm ~ +Test_Patient_dat$Sex + Test_Patient_dat$Age + Test_Patient_dat$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    289 20244                           
## 2    288 18940  1      1304 8.468e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cPAD_svm <- lm(PAD_svm~+Test_Patient_dat$Sex+Test_Patient_dat$Age+Test_Patient_dat$Age2)$residuals

plot(Test_Patient_dat$Age,cPAD_svm,xlim=c(10,75),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test Patienit)",ylab="Brain PAD corrected for age")
abline(lm(cPAD_svm~Test_Patient_dat$Age+Test_Patient_dat$Age2))
## Warning in abline(lm(cPAD_svm ~ Test_Patient_dat$Age + Test_Patient_dat$Age2)):
## only using the first two of 3 regression coefficients
abline(h=0,lty=2,col="red")
mtext("D", side=3, line=-27, cex=2, adj = 0.5, outer=TRUE)

dev.off()
## png 
##   2

Brain PAD correction -table

test_dat <- rbind(Test_HC_data,Test_Patient_dat)
test_dat$Dx <- c(rep(0,nrow(Test_HC_data)),rep(1,nrow(Test_Patient_dat)))
test_dat$Dx <- factor(test_dat$Dx)

pred_age <- predict(svm.fit.77 , as.matrix(test_dat[,5:81]))
test_error_svm <- pred_age-test_dat$Age
PAD_svm <- test_error_svm
mean(PAD_svm) ; sd(PAD_svm) ; median(PAD_svm)
## [1] 0.2711754
## [1] 9.874431
## [1] 0.7584126
Model_linear <- lm(PAD_svm~test_dat$Age+test_dat$Sex)
NullModel <- lm(PAD_svm~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_svm ~ 1
## Model 2: PAD_svm ~ test_dat$Age + test_dat$Sex
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    500 48752                           
## 2    498 31995  2     16757 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test_dat$Age2 <- test_dat$Age^2
Model_quad <- lm(PAD_svm~+test_dat$Sex+test_dat$Age+test_dat$Age2)
anova(NullModel,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_svm ~ 1
## Model 2: PAD_svm ~ +test_dat$Sex + test_dat$Age + test_dat$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    500 48752                           
## 2    497 30889  3     17864 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model_linear,Model_quad,test="Chisq") # Not Reject H0 (Reduced model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_svm ~ test_dat$Age + test_dat$Sex
## Model 2: PAD_svm ~ +test_dat$Sex + test_dat$Age + test_dat$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    498 31995                           
## 2    497 30889  1    1106.3 2.453e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cPAD_svm <- lm(PAD_svm~+test_dat$Sex+test_dat$Age+test_dat$Age2)$residuals
mean(cPAD_svm) ; sd(cPAD_svm) ; median(cPAD_svm)
## [1] 2.574731e-16
## [1] 7.859859
## [1] -0.3684384
library(plotrix)
dat <- data.frame(SubjID=test_dat$SubjID,Brain_PAD = c(PAD_svm),Status=rep(c("Control","Patient"),c(nrow(Test_HC_data),nrow(Test_Patient_dat))))

dat$status_sch <- dat$status_trs <- dat$status_ssd <- rep(NA,nrow(dat))
dat$status_sch[test_dat$SubjID %in% Test_HC_data$SubjID ] <- "Control"   
dat$status_sch[test_dat$SubjID %in% Test_schizophrenia_dat$SubjID ] <- "Schizophrenia"
dat$status_trs[test_dat$SubjID %in% Test_TRS_dat$SubjID ] <- "TRS"
dat$status_trs[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

dat$status_ssd[test_dat$SubjID %in% Test_FESSD_dat$SubjID ] <- "FE-SSD"
dat$status_ssd[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

library(dplyr)
palette <- c("FE-SSD"="darkgreen","TRS"="red","Control"="grey","Schizophrenia"="blue","Patient"="orange")

dat %>% dplyr::select(Brain_PAD,Status) %>% group_by(Status) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 2 × 4
##   Status   mean    sd std.error
##   <chr>   <dbl> <dbl>     <dbl>
## 1 Control -2.57  8.50     0.588
## 2 Patient  2.31 10.3      0.602
dat %>% dplyr::select(Brain_PAD,status_sch) %>% group_by(status_sch) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_sch     mean    sd std.error
##   <chr>         <dbl> <dbl>     <dbl>
## 1 Control       -2.57  8.50     0.588
## 2 Schizophrenia  1.58 10.2      0.729
## 3 <NA>           3.78 10.4      1.06
dat %>% dplyr::select(Brain_PAD,status_trs) %>% group_by(status_trs) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_trs  mean    sd std.error
##   <chr>      <dbl> <dbl>     <dbl>
## 1 Control    -2.57  8.50     0.588
## 2 TRS         2.92  8.12     1.20 
## 3 <NA>        2.19 10.7      0.679
dat %>% dplyr::select(Brain_PAD,status_ssd) %>% group_by(status_ssd) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_ssd  mean    sd std.error
##   <chr>      <dbl> <dbl>     <dbl>
## 1 Control    -2.57  8.50     0.588
## 2 FE-SSD      4.37 10.8      1.27 
## 3 <NA>        1.63 10.1      0.679
ggplot(dat,aes(x = Brain_PAD, fill = Status)) + geom_density(alpha = 0.5) + scale_fill_manual(values=palette) +
  coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_sch),],aes(x = Brain_PAD, fill = status_sch)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_ssd),],aes(x = Brain_PAD, fill = status_ssd)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_trs),],aes(x = Brain_PAD, fill = status_trs)) +scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

summary(lm(Brain_PAD~Status+test_dat$Sex+test_dat$Age+test_dat$Age2,data=dat))
## 
## Call:
## lm(formula = Brain_PAD ~ Status + test_dat$Sex + test_dat$Age + 
##     test_dat$Age2, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.7841  -4.4552  -0.4216   5.1913  27.2216 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.358076   3.242486   0.110 0.912111    
## StatusPatient  4.504713   0.694621   6.485 2.15e-10 ***
## test_dat$Sex2  2.166808   0.703772   3.079 0.002193 ** 
## test_dat$Age   0.241326   0.175295   1.377 0.169232    
## test_dat$Age2 -0.008202   0.002143  -3.827 0.000147 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.577 on 496 degrees of freedom
## Multiple R-squared:  0.4159, Adjusted R-squared:  0.4112 
## F-statistic: 88.31 on 4 and 496 DF,  p-value: < 2.2e-16
summary(lm(Brain_PAD~status_sch+test_dat$Sex+test_dat$Age+test_dat$Age2,data=dat))
## 
## Call:
## lm(formula = Brain_PAD ~ status_sch + test_dat$Sex + test_dat$Age + 
##     test_dat$Age2, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.8665  -4.4313  -0.2863   5.2566  26.8055 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.904110   3.612829   0.527  0.59846    
## status_schSchizophrenia  4.832705   0.765115   6.316 7.14e-10 ***
## test_dat$Sex2            1.988370   0.772271   2.575  0.01039 *  
## test_dat$Age             0.145981   0.193945   0.753  0.45208    
## test_dat$Age2           -0.006880   0.002341  -2.939  0.00348 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.416 on 400 degrees of freedom
##   (결측으로 인하여 96개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.4071, Adjusted R-squared:  0.4011 
## F-statistic: 68.65 on 4 and 400 DF,  p-value: < 2.2e-16
dat_for_regression <- dat
replicated <- setdiff(which(!is.na(dat$status_trs) & !is.na(dat$status_ssd)),1:214)
dat[replicated,]
##     SubjID Brain_PAD  Status status_ssd status_trs    status_sch
## 474    T19 22.155110 Patient     FE-SSD        TRS Schizophrenia
## 478    T24  4.071636 Patient     FE-SSD        TRS Schizophrenia
dat_for_regression <- rbind(dat_for_regression,dat[replicated,])
dat_for_regression[replicated,]$status_ssd <- NA
dat_for_regression[(nrow(dat_for_regression)-1):nrow(dat_for_regression),]$status_trs <- NA

table(dat_for_regression$status_trs)
## 
## Control     TRS 
##     209      46
table(dat_for_regression$status_ssd)
## 
## Control  FE-SSD 
##     209      72
test_dat_for_regression <- test_dat
test_dat_for_regression <- rbind(test_dat_for_regression,test_dat[replicated,])

dat_for_regression$status_ssd[is.na(dat_for_regression$status_ssd)] = dat_for_regression$status_trs[is.na(dat_for_regression$status_ssd)]

summary(lm(Brain_PAD~dat_for_regression$status_ssd+test_dat_for_regression$Sex+
             test_dat_for_regression$Age+
             test_dat_for_regression$Age2,data=dat_for_regression))
## 
## Call:
## lm(formula = Brain_PAD ~ dat_for_regression$status_ssd + test_dat_for_regression$Sex + 
##     test_dat_for_regression$Age + test_dat_for_regression$Age2, 
##     data = dat_for_regression)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.5630  -4.2999  -0.3654   4.7411  20.1858 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          6.638193   4.080436   1.627   0.1048    
## dat_for_regression$status_ssdFE-SSD  3.718865   0.988488   3.762   0.0002 ***
## dat_for_regression$status_ssdTRS     7.771752   1.226397   6.337 7.92e-10 ***
## test_dat_for_regression$Sex2         2.247930   0.819019   2.745   0.0064 ** 
## test_dat_for_regression$Age         -0.115573   0.224201  -0.515   0.6066    
## test_dat_for_regression$Age2        -0.003847   0.002714  -1.417   0.1574    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.958 on 321 degrees of freedom
##   (결측으로 인하여 176개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.4704, Adjusted R-squared:  0.4621 
## F-statistic: 57.02 on 5 and 321 DF,  p-value: < 2.2e-16
# ANOVA results (Control vs FE-SSD vs TRS)
summary(res_aov <- aov(dat_for_regression$Brain_PAD ~ dat_for_regression$status_ssd))
##                                Df Sum Sq Mean Sq F value   Pr(>F)    
## dat_for_regression$status_ssd   2   3121  1560.3   19.28 1.23e-08 ***
## Residuals                     324  26224    80.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 176개의 관측치가 삭제되었습니다.
pairwise.t.test(dat_for_regression$Brain_PAD,dat_for_regression$status_ssd, p.adj = "fdr")$p.value
##             Control    FE-SSD
## FE-SSD 1.098211e-07        NA
## TRS    3.206296e-04 0.3937673
pairwise.t.test(dat_for_regression$Brain_PAD,dat_for_regression$status_ssd, p.adj = "bonferroni")$p.value
##             Control FE-SSD
## FE-SSD 1.098211e-07     NA
## TRS    6.412593e-04      1
# t.test results (Control vs Schizo)
t.test(dat$Brain_PAD~dat$status_sch)
## 
##  Welch Two Sample t-test
## 
## data:  dat$Brain_PAD by dat$status_sch
## t = -4.4362, df = 380.36, p-value = 1.201e-05
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -5.997379 -2.313699
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -2.571412                    1.584126
t.test(dat$Brain_PAD~dat$status_sch, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  dat$Brain_PAD by dat$status_sch
## t = -4.462, df = 403, p-value = 1.055e-05
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -5.986372 -2.324705
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -2.571412                    1.584126
#### corrected_Brain_PAD ####

dat <- data.frame(SubjID=test_dat$SubjID,corrected_Brain_PAD = c(cPAD_svm),Status=rep(c("Control","Patient"),c(nrow(Test_HC_data),nrow(Test_Patient_dat))))

dat$status_sch <- dat$status_trs <- dat$status_ssd <- rep(NA,nrow(dat))
dat$status_sch[test_dat$SubjID %in% Test_HC_data$SubjID ] <- "Control"   
dat$status_sch[test_dat$SubjID %in% Test_schizophrenia_dat$SubjID ] <- "Schizophrenia"
dat$status_trs[test_dat$SubjID %in% Test_TRS_dat$SubjID ] <- "TRS"
dat$status_trs[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

dat$status_ssd[test_dat$SubjID %in% Test_FESSD_dat$SubjID ] <- "FE-SSD"
dat$status_ssd[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

dat %>% dplyr::select(corrected_Brain_PAD,Status) %>% group_by(Status) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 2 × 4
##   Status   mean    sd std.error
##   <chr>   <dbl> <dbl>     <dbl>
## 1 Control -2.56  6.68     0.462
## 2 Patient  1.84  8.13     0.476
dat %>% dplyr::select(corrected_Brain_PAD,status_sch) %>% group_by(status_sch) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_sch     mean    sd std.error
##   <chr>         <dbl> <dbl>     <dbl>
## 1 Control       -2.56  6.68     0.462
## 2 Schizophrenia  2.09  8.11     0.579
## 3 <NA>           1.32  8.20     0.837
dat %>% dplyr::select(corrected_Brain_PAD,status_trs) %>% group_by(status_trs) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_trs  mean    sd std.error
##   <chr>      <dbl> <dbl>     <dbl>
## 1 Control    -2.56  6.68     0.462
## 2 TRS         4.34  6.34     0.935
## 3 <NA>        1.37  8.36     0.533
dat %>% dplyr::select(corrected_Brain_PAD,status_ssd) %>% group_by(status_ssd) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_ssd  mean    sd std.error
##   <chr>      <dbl> <dbl>     <dbl>
## 1 Control    -2.56  6.68     0.462
## 2 FE-SSD      1.41  8.12     0.957
## 3 <NA>        1.97  8.15     0.550
ggplot(dat,aes(x = corrected_Brain_PAD, fill = Status)) + geom_density(alpha = 0.5) + scale_fill_manual(values=palette) +
  coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_sch),],aes(x = corrected_Brain_PAD, fill = status_sch)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_ssd),],aes(x = corrected_Brain_PAD, fill = status_ssd)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_trs),],aes(x = corrected_Brain_PAD, fill = status_trs)) +scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

library(ggplot2)
ggplot(dat,aes(x = corrected_Brain_PAD, fill = Status)) + geom_density(alpha = 0.5) + scale_fill_manual(values=palette[c(3,5)]) +
  coord_cartesian(xlim =c(-30,30)) + theme_bw()

A <- ggplot(dat[!is.na(dat$status_sch),],aes(x = corrected_Brain_PAD, fill = status_sch)) + scale_fill_manual(values=palette[c(3,4)]) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

B <- ggplot(dat[!is.na(dat$status_ssd),],aes(x = corrected_Brain_PAD, fill = status_ssd)) + scale_fill_manual(values=palette[c(3,1)]) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

C <- ggplot(dat[!is.na(dat$status_trs),],aes(x = corrected_Brain_PAD, fill = status_trs)) +scale_fill_manual(values=palette[c(3,2)]) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

png("p2(SVR).png", width = 1400, height = 300)
ggarrange(A,B,C,
          labels = c("A","B","C"), 
          ncol = 3, nrow = 1,
          font.label = list(size=20))
dev.off()
## png 
##   2
dat_for_regression <- dat
replicated <- setdiff(which(!is.na(dat$status_trs) & !is.na(dat$status_ssd)),1:214)
dat[replicated,]
##     SubjID corrected_Brain_PAD  Status status_ssd status_trs    status_sch
## 474    T19           16.133972 Patient     FE-SSD        TRS Schizophrenia
## 478    T24           -1.234488 Patient     FE-SSD        TRS Schizophrenia
dat_for_regression <- rbind(dat_for_regression,dat[replicated,])
dat_for_regression[replicated,]$status_ssd <- NA
dat_for_regression[(nrow(dat_for_regression)-1):nrow(dat_for_regression),]$status_trs <- NA

table(dat_for_regression$status_trs)
## 
## Control     TRS 
##     209      46
table(dat_for_regression$status_ssd)
## 
## Control  FE-SSD 
##     209      72
test_dat_for_regression <- test_dat
test_dat_for_regression <- rbind(test_dat_for_regression,test_dat[replicated,])

dat_for_regression$status_ssd[is.na(dat_for_regression$status_ssd)] = dat_for_regression$status_trs[is.na(dat_for_regression$status_ssd)]

summary(res_aov <- aov(dat_for_regression$corrected_Brain_PAD~
                         dat_for_regression$status_ssd))
##                                Df Sum Sq Mean Sq F value   Pr(>F)    
## dat_for_regression$status_ssd   2   2218  1108.9   22.79 5.47e-10 ***
## Residuals                     324  15761    48.6                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 176개의 관측치가 삭제되었습니다.
pairwise.t.test(dat_for_regression$corrected_Brain_PAD,
               dat_for_regression$status_ssd, p.adj = "fdr")$p.value
##             Control     FE-SSD
## FE-SSD 5.810560e-05         NA
## TRS    1.006485e-08 0.02667952
pairwise.t.test(dat_for_regression$corrected_Brain_PAD,
               dat_for_regression$status_ssd, p.adj = "bonferroni")$p.value
##             Control     FE-SSD
## FE-SSD 1.162112e-04         NA
## TRS    1.006485e-08 0.08003857
# t.test results (Control vs Schizo)
t.test(dat$corrected_Brain_PAD~dat$status_sch)
## 
##  Welch Two Sample t-test
## 
## data:  dat$corrected_Brain_PAD by dat$status_sch
## t = -6.2772, df = 378.38, p-value = 9.455e-10
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -6.107606 -3.193984
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -2.564465                    2.086330
t.test(dat$corrected_Brain_PAD~dat$status_sch, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  dat$corrected_Brain_PAD by dat$status_sch
## t = -6.3161, df = 403, p-value = 7.101e-10
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -6.098352 -3.203238
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -2.564465                    2.086330
summary(res_aov <- aov(dat$corrected_Brain_PAD~dat$status_sch))
##                 Df Sum Sq Mean Sq F value  Pr(>F)    
## dat$status_sch   1   2188  2187.8   39.89 7.1e-10 ***
## Residuals      403  22101    54.8                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 96개의 관측치가 삭제되었습니다.


RVR[RBF] (Nested CV)


start.time <- Sys.time()

Nfolds <- 10

rvm.fun <- function(N,i){
  Nfolds <- N
  set.seed(i)
  outer_folds <- createFolds(Train_HC_data$Age,k=Nfolds)
  cv.rvm <- lapply(outer_folds,function(index) {
    tr <- Train_HC_data[-index,]
    ts <- Train_HC_data[index,]
    rvm.fit <- rvm(as.matrix(tr[, c(5:81)]),
               tr[,2],type="regression",
               kpar="automatic",
               kernel="rbfdot",cross=Nfolds)
    
    c(rvm.fit@cross,rvm.fit@kernelf@kpar[["sigma"]])
    })
}

Ntimes <- 10
RES <- NULL
for(i in 1:Ntimes){
  res <- mclapply(10,i,FUN=rvm.fun,mc.cores = 6)
  RES <- rbind(RES,t(data.frame(res)))
}
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel 
## Using automatic sigma estimation (sigest) for RBF or laplace kernel
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
## Time difference of 29.93733 mins
rvm.fit.77 <- rvm(as.matrix(Train_HC_data[,c(5:81)]),Train_HC_data[,2],
                  kpar=list(sigma=RES[,2][which.min(RES[,1])]),
                  type="regression",kernel="rbfdot")
rvm.fit.77
## Relevance Vector Machine object of class "rvm" 
## Problem type: regression 
##  
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  4.57552737754227e-10 
## 
## Number of Relevance Vectors : 34 
## Variance :  111.9963
## Training error : 105.444590799
# Train Control
ctrl_pred_age <- predict(rvm.fit.77 ,as.matrix(Train_HC_data[,5:81]))
train_error_ctrl_rvm <- ctrl_pred_age-Train_HC_data$Age

par(mfrow=c(1,1))
plot(ctrl_pred_age,Train_HC_data$Age,
     xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Train control data)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",
       legend=c(paste("r=",round(cor(Train_HC_data$Age,ctrl_pred_age),4)),
                paste("R2=",round(cor(Train_HC_data$Age,ctrl_pred_age)^2,4)),
                paste("MAE=",round(sum(abs(train_error_ctrl_rid)/length(train_error_ctrl_rid)),4)),
                paste("VEcv=",round(1-sum(train_error_ctrl_rid^2/(var(Train_HC_data$Age)*(length(Train_HC_data$Age)-1))),4)*100,"%")) ,bty="n")

# Test Control

ctrl_pred_age <- predict(rvm.fit.77 , as.matrix(Test_HC_data[,5:81]))
test_error_ctrl_rvm <- ctrl_pred_age-Test_HC_data$Age

plot(ctrl_pred_age,Test_HC_data$Age,
     xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Test control data)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",
       legend=c(paste("r=",round(cor(Test_HC_data$Age,ctrl_pred_age),4)),
                paste("R2=",round(cor(Test_HC_data$Age,ctrl_pred_age)^2,4)),
                paste("MAE=",round(sum(abs(test_error_ctrl_rvm)/length(test_error_ctrl_rvm)),4)),
                paste("VEcv=",round(1-sum(test_error_ctrl_rvm^2/(var(Test_HC_data$Age)*(length(Test_HC_data$Age)-1))),4)*100,"%")) ,bty="n")

# Patient

pat_pred_age <- predict(rvm.fit.77 ,as.matrix(Test_Patient_dat[,5:81]))
test_error_pat_rvm <- pat_pred_age-Test_Patient_dat$Age

plot(pat_pred_age,Test_Patient_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Patient)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_Patient_dat$Age,pat_pred_age),4)),paste("R2=",round(cor(Test_Patient_dat$Age,pat_pred_age)^2,4)),
                          paste("MAE=",round(sum(abs(test_error_pat_rid)/length(test_error_pat_rid)),4)),
                          paste("VEcv=",round(1-sum(test_error_pat_rid^2/(var(Test_Patient_dat$Age)*(length(Test_Patient_dat$Age)-1))),4)*100,"%")),bty="n")

# Patient - schizo
sch_pred_age <- predict(rvm.fit.77, as.matrix(Test_schizophrenia_dat[,5:81]))
test_error_sch_rvm <- sch_pred_age-Test_schizophrenia_dat$Age

plot(sch_pred_age,Test_schizophrenia_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Schizophrenia)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_schizophrenia_dat$Age,sch_pred_age),4)),
                          paste("R2=",round(cor(Test_schizophrenia_dat$Age,sch_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_sch_rvm)/length(test_error_sch_rvm)),4)),
paste("VEcv=",round(1-sum(test_error_sch_rvm^2/(var(Test_schizophrenia_dat$Age)*(length(Test_schizophrenia_dat$Age)-1))),4)*100,"%")),bty="n")

# Patient - TRS
TRS_pred_age <- predict(rvm.fit.77, as.matrix(Test_TRS_dat[,5:81]))
test_error_trs_rvm <- TRS_pred_age-Test_TRS_dat$Age

plot(TRS_pred_age,Test_TRS_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (TRS)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_TRS_dat$Age,TRS_pred_age),4)),
                          paste("R2=",round(cor(Test_TRS_dat$Age,TRS_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_trs_rvm)/length(test_error_trs_rvm)),4)),
paste("VEcv=",round(1-sum(test_error_trs_rvm^2/(var(Test_TRS_dat$Age)*(length(Test_TRS_dat$Age)-1))),4)*100,"%")),bty="n")

# Patient - FESSD
SSD_pred_age <- predict(rvm.fit.77, as.matrix(Test_FESSD_dat[,5:81]))
test_error_ssd_rvm <- SSD_pred_age-Test_FESSD_dat$Age

plot(SSD_pred_age,Test_FESSD_dat$Age,xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (FE-SSD)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",legend=c(paste("r=",round(cor(Test_FESSD_dat$Age,SSD_pred_age),4)),
                          paste("R2=",round(cor(Test_FESSD_dat$Age,SSD_pred_age)^2,4)),
paste("MAE=",round(sum(abs(test_error_ssd_rvm)/length(test_error_ssd_rvm)),4)),
paste("VEcv=",round(1-sum(test_error_ssd_rvm^2/(var(Test_FESSD_dat$Age)*(length(Test_FESSD_dat$Age)-1))),4)*100,"%")),bty="n")

Brain PAD correction-plot

#### Brain PAD correction-HC
pred_age <- predict(rvm.fit.77 , as.matrix(Test_HC_data[,5:81]))
test_error_rvm <- pred_age-Test_HC_data$Age
PAD_rvm <- test_error_rvm

png("3_RVR_corr.png", width = 800, height = 600)
par(mfrow=c(2,2), oma=c(1,3,1,1))

plot(Test_HC_data$Age,PAD_rvm,xlim=c(10,80),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test HC)",ylab="Brain PAD uncorrected")
abline(h=0,lty=2,col="red")
abline(lm(PAD_rvm~Test_HC_data$Age))
legend("topright",legend=paste("r=",round(cor(PAD_rvm,Test_HC_data$Age),4),"(p<0.05)",sep=""))
cor.test(PAD_rvm,Test_HC_data$Age)$p.value
## [1] 2.502807e-51
mtext("A", side=3, line=-3, cex=2, adj = -0.0, outer=TRUE)

Model_linear <- lm(PAD_rvm~Test_HC_data$Age+Test_HC_data$Sex)
NullModel <- lm(PAD_rvm~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rvm ~ 1
## Model 2: PAD_rvm ~ Test_HC_data$Age + Test_HC_data$Sex
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    208 33770                           
## 2    206 10908  2     22862 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test_HC_data$Age2 <- Test_HC_data$Age^2
Model_quad <- lm(PAD_rvm~Test_HC_data$Sex+Test_HC_data$Age+Test_HC_data$Age2)
anova(NullModel,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rvm ~ 1
## Model 2: PAD_rvm ~ Test_HC_data$Sex + Test_HC_data$Age + Test_HC_data$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    208 33770                           
## 2    205 10875  3     22895 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model_linear,Model_quad,test="Chisq") # Not Reject H0 (Reduced model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rvm ~ Test_HC_data$Age + Test_HC_data$Sex
## Model 2: PAD_rvm ~ Test_HC_data$Sex + Test_HC_data$Age + Test_HC_data$Age2
##   Res.Df   RSS Df Sum of Sq Pr(>Chi)
## 1    206 10908                      
## 2    205 10875  1    33.085   0.4297
cPAD_rvm <- lm(PAD_rvm~Test_HC_data$Sex+Test_HC_data$Age)$residuals

plot(Test_HC_data$Age,cPAD_rvm,xlim=c(10,75),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test HC)",ylab="Brain PAD corrected for age")
abline(lm(cPAD_rvm~Test_HC_data$Age))
abline(h=0,lty=2,col="red")
mtext("B", side=3, line=-3, cex=2, adj = 0.5, outer=TRUE)

#### Brain PAD correction-Patient
pred_age <- predict(rvm.fit.77 , as.matrix(Test_Patient_dat[,5:81]))
test_error_rvm <- pred_age-Test_Patient_dat$Age
PAD_rvm <- test_error_rvm

plot(Test_Patient_dat$Age,PAD_rvm,xlim=c(10,80),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test Patient)",ylab="Brain PAD uncorrected")
abline(h=0,lty=2,col="red")
abline(lm(PAD_rvm~Test_Patient_dat$Age))
legend("topright",legend=paste("r=",round(cor(PAD_rvm,Test_Patient_dat$Age),4),"(p<0.05)",sep=""))
cor.test(PAD_rvm,Test_Patient_dat$Age)$p.value
## [1] 1.947254e-51
mtext("C", side=3, line=-27, cex=2, adj = -0.0, outer=TRUE)

Model_linear <- lm(PAD_rvm~Test_Patient_dat$Age+Test_Patient_dat$Sex)
NullModel <- lm(PAD_rvm~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rvm ~ 1
## Model 2: PAD_rvm ~ Test_Patient_dat$Age + Test_Patient_dat$Sex
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    291 48018                           
## 2    289 20660  2     27358 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test_Patient_dat$Age2 <- Test_Patient_dat$Age^2
Model_quad <- lm(PAD_rvm~Test_Patient_dat$Sex+Test_Patient_dat$Age+Test_Patient_dat$Age2)
anova(NullModel,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rvm ~ 1
## Model 2: PAD_rvm ~ Test_Patient_dat$Sex + Test_Patient_dat$Age + Test_Patient_dat$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    291 48018                           
## 2    288 20652  3     27366 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model_linear,Model_quad,test="Chisq") # Not Reject H0 (Reduced model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rvm ~ Test_Patient_dat$Age + Test_Patient_dat$Sex
## Model 2: PAD_rvm ~ Test_Patient_dat$Sex + Test_Patient_dat$Age + Test_Patient_dat$Age2
##   Res.Df   RSS Df Sum of Sq Pr(>Chi)
## 1    289 20660                      
## 2    288 20652  1    8.5016   0.7306
cPAD_rvm <- lm(PAD_rvm~Test_Patient_dat$Sex+Test_Patient_dat$Age)$residuals

plot(Test_Patient_dat$Age,cPAD_rvm,xlim=c(10,75),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test Patient)",ylab="Brain PAD corrected for age")
abline(lm(cPAD_rvm~Test_Patient_dat$Age))
abline(h=0,lty=2,col="red")
mtext("D", side=3, line=-27, cex=2, adj = 0.5, outer=TRUE)

dev.off()
## png 
##   2

Brain PAD correction -table

##### Brain PAD correction 
test_dat <- rbind(Test_HC_data,Test_Patient_dat)
test_dat$Dx <- c(rep(0,nrow(Test_HC_data)),rep(1,nrow(Test_Patient_dat)))
test_dat$Dx <- factor(test_dat$Dx)

pred_age <- predict(rvm.fit.77 , as.matrix(test_dat[,5:81]))
test_error_rvm <- pred_age-test_dat$Age
PAD_rvm <- test_error_rvm
mean(PAD_rvm) ; sd(PAD_rvm) ; median(PAD_rvm)
## [1] -2.520456
## [1] 12.83012
## [1] -0.9712669
Model_linear <- lm(PAD_rvm~test_dat$Age+test_dat$Sex)
NullModel <- lm(PAD_rvm~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rvm ~ 1
## Model 2: PAD_rvm ~ test_dat$Age + test_dat$Sex
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    500 82306                           
## 2    498 32020  2     50286 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test_dat$Age2 <- test_dat$Age^2
Model_quad <- lm(PAD_rvm~test_dat$Sex+test_dat$Age+test_dat$Age2)
anova(NullModel,Model_quad,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rvm ~ 1
## Model 2: PAD_rvm ~ test_dat$Sex + test_dat$Age + test_dat$Age2
##   Res.Df   RSS Df Sum of Sq  Pr(>Chi)    
## 1    500 82306                           
## 2    497 31990  3     50316 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model_linear,Model_quad,test="Chisq") # Not Reject H0 (Reduced model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rvm ~ test_dat$Age + test_dat$Sex
## Model 2: PAD_rvm ~ test_dat$Sex + test_dat$Age + test_dat$Age2
##   Res.Df   RSS Df Sum of Sq Pr(>Chi)
## 1    498 32020                      
## 2    497 31990  1    30.349   0.4923
cPAD_rvm <- lm(PAD_rvm~test_dat$Sex+test_dat$Age)$residuals
mean(cPAD_rvm) ; sd(cPAD_rvm) ; median(cPAD_rvm)
## [1] 3.864157e-16
## [1] 8.002527
## [1] -0.6976008
library(plotrix)
dat <- data.frame(SubjID=test_dat$SubjID,Brain_PAD = c(PAD_rvm),Status=rep(c("Control","Patient"),c(nrow(Test_HC_data),nrow(Test_Patient_dat))))

dat$status_sch <- dat$status_trs <- dat$status_ssd <- rep(NA,nrow(dat))
dat$status_sch[test_dat$SubjID %in% Test_HC_data$SubjID ] <- "Control"   
dat$status_sch[test_dat$SubjID %in% Test_schizophrenia_dat$SubjID ] <- "Schizophrenia"
dat$status_trs[test_dat$SubjID %in% Test_TRS_dat$SubjID ] <- "TRS"
dat$status_trs[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

dat$status_ssd[test_dat$SubjID %in% Test_FESSD_dat$SubjID ] <- "FE-SSD"
dat$status_ssd[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

library(dplyr)
palette <- c("FE-SSD"="darkgreen","TRS"="red","Control"="grey","Schizophrenia"="blue","Patient"="orange")

dat %>% dplyr::select(Brain_PAD,Status) %>% group_by(Status) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 2 × 4
##   Status   mean    sd std.error
##   <chr>   <dbl> <dbl>     <dbl>
## 1 Control -3.72  12.7     0.881
## 2 Patient -1.66  12.8     0.752
dat %>% dplyr::select(Brain_PAD,status_sch) %>% group_by(status_sch) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_sch     mean    sd std.error
##   <chr>         <dbl> <dbl>     <dbl>
## 1 Control       -3.72  12.7     0.881
## 2 Schizophrenia -3.74  12.6     0.898
## 3 <NA>           2.59  12.4     1.27
dat %>% dplyr::select(Brain_PAD,status_trs) %>% group_by(status_trs) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_trs   mean    sd std.error
##   <chr>       <dbl> <dbl>     <dbl>
## 1 Control    -3.72   12.7     0.881
## 2 TRS        -5.62   12.2     1.79 
## 3 <NA>       -0.920  12.9     0.820
dat %>% dplyr::select(Brain_PAD,status_ssd) %>% group_by(status_ssd) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_ssd  mean    sd std.error
##   <chr>      <dbl> <dbl>     <dbl>
## 1 Control    -3.72  12.7     0.881
## 2 FE-SSD      3.95  12.6     1.49 
## 3 <NA>       -3.50  12.4     0.837
ggplot(dat,aes(x = Brain_PAD, fill = Status)) + geom_density(alpha = 0.5) + scale_fill_manual(values=palette) +
  coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_sch),],aes(x = Brain_PAD, fill = status_sch)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_ssd),],aes(x = Brain_PAD, fill = status_ssd)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_trs),],aes(x = Brain_PAD, fill = status_trs)) +scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

summary(lm(Brain_PAD~Status+test_dat$Sex+test_dat$Age,data=dat))
## 
## Call:
## lm(formula = Brain_PAD ~ Status + test_dat$Sex + test_dat$Age, 
##     data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.5958  -5.3749  -0.5848   4.3840  28.9452 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   21.73289    1.14156  19.038  < 2e-16 ***
## StatusPatient  1.80534    0.72875   2.477   0.0136 *  
## test_dat$Sex2  3.58138    0.74025   4.838 1.75e-06 ***
## test_dat$Age  -0.73605    0.02624 -28.051  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.978 on 497 degrees of freedom
## Multiple R-squared:  0.6157, Adjusted R-squared:  0.6134 
## F-statistic: 265.4 on 3 and 497 DF,  p-value: < 2.2e-16
summary(lm(Brain_PAD~status_sch+test_dat$Sex+test_dat$Age,data=dat))
## 
## Call:
## lm(formula = Brain_PAD ~ status_sch + test_dat$Sex + test_dat$Age, 
##     data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.9244  -5.5753  -0.4334   4.5686  28.7305 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             21.36217    1.23478  17.300  < 2e-16 ***
## status_schSchizophrenia  1.95278    0.80445   2.427   0.0156 *  
## test_dat$Sex2            4.02504    0.82356   4.887 1.48e-06 ***
## test_dat$Age            -0.73365    0.02921 -25.112  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.911 on 401 degrees of freedom
##   (결측으로 인하여 96개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.6115, Adjusted R-squared:  0.6086 
## F-statistic: 210.4 on 3 and 401 DF,  p-value: < 2.2e-16
dat_for_regression <- dat
replicated <- setdiff(which(!is.na(dat$status_trs) & !is.na(dat$status_ssd)),1:214)
dat[replicated,]
##     SubjID  Brain_PAD  Status status_ssd status_trs    status_sch
## 474    T19 12.0711481 Patient     FE-SSD        TRS Schizophrenia
## 478    T24  0.8081361 Patient     FE-SSD        TRS Schizophrenia
dat_for_regression <- rbind(dat_for_regression,dat[replicated,])
dat_for_regression[replicated,]$status_ssd <- NA
dat_for_regression[(nrow(dat_for_regression)-1):nrow(dat_for_regression),]$status_trs <- NA

table(dat_for_regression$status_trs)
## 
## Control     TRS 
##     209      46
table(dat_for_regression$status_ssd)
## 
## Control  FE-SSD 
##     209      72
test_dat_for_regression <- test_dat
test_dat_for_regression <- rbind(test_dat_for_regression,test_dat[replicated,])

dat_for_regression$status_ssd[is.na(dat_for_regression$status_ssd)] = dat_for_regression$status_trs[is.na(dat_for_regression$status_ssd)]

summary(lm(Brain_PAD~dat_for_regression$status_ssd+test_dat_for_regression$Sex+
             test_dat_for_regression$Age,data=dat_for_regression))
## 
## Call:
## lm(formula = Brain_PAD ~ dat_for_regression$status_ssd + test_dat_for_regression$Sex + 
##     test_dat_for_regression$Age, data = dat_for_regression)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.9597  -4.9915  -0.6447   4.1766  29.4979 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         22.70813    1.31493  17.270  < 2e-16 ***
## dat_for_regression$status_ssdFE-SSD  1.61541    1.06556   1.516    0.130    
## dat_for_regression$status_ssdTRS     2.89400    1.27588   2.268    0.024 *  
## test_dat_for_regression$Sex2         3.59280    0.88869   4.043 6.61e-05 ***
## test_dat_for_regression$Age         -0.76214    0.03172 -24.031  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.577 on 322 degrees of freedom
##   (결측으로 인하여 176개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.6667, Adjusted R-squared:  0.6625 
## F-statistic:   161 on 4 and 322 DF,  p-value: < 2.2e-16
# ANOVA results (Control vs FE-SSD vs TRS)
summary(res_aov <- aov(dat_for_regression$Brain_PAD ~ dat_for_regression$status_ssd))
##                                Df Sum Sq Mean Sq F value   Pr(>F)    
## dat_for_regression$status_ssd   2   3744  1871.8   11.73 1.21e-05 ***
## Residuals                     324  51708   159.6                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 176개의 관측치가 삭제되었습니다.
pairwise.t.test(dat_for_regression$Brain_PAD,dat_for_regression$status_ssd, p.adj = "fdr")$p.value
##             Control       FE-SSD
## FE-SSD 3.631959e-05           NA
## TRS    3.563179e-01 0.0001109805
pairwise.t.test(dat_for_regression$Brain_PAD,dat_for_regression$status_ssd, p.adj = "bonferroni")$p.value
##             Control      FE-SSD
## FE-SSD 3.631959e-05          NA
## TRS    1.000000e+00 0.000221961
# t.test results (Control vs Schizo)
t.test(dat$Brain_PAD~dat$status_sch)
## 
##  Welch Two Sample t-test
## 
## data:  dat$Brain_PAD by dat$status_sch
## t = 0.015384, df = 401.95, p-value = 0.9877
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -2.454454  2.493172
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -3.721538                   -3.740897
t.test(dat$Brain_PAD~dat$status_sch, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  dat$Brain_PAD by dat$status_sch
## t = 0.015378, df = 403, p-value = 0.9877
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -2.455490  2.494208
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -3.721538                   -3.740897
#### corrected_Brain_PAD ####

dat <- data.frame(SubjID=test_dat$SubjID,corrected_Brain_PAD = c(cPAD_rvm),Status=rep(c("Control","Patient"),c(nrow(Test_HC_data),nrow(Test_Patient_dat))))

dat$status_sch <- dat$status_trs <- dat$status_ssd <- rep(NA,nrow(dat))
dat$status_sch[test_dat$SubjID %in% Test_HC_data$SubjID ] <- "Control"   
dat$status_sch[test_dat$SubjID %in% Test_schizophrenia_dat$SubjID ] <- "Schizophrenia"
dat$status_trs[test_dat$SubjID %in% Test_TRS_dat$SubjID ] <- "TRS"
dat$status_trs[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

dat$status_ssd[test_dat$SubjID %in% Test_FESSD_dat$SubjID ] <- "FE-SSD"
dat$status_ssd[test_dat$SubjID %in% Test_HC_data$SubjID  ] <- "Control"

dat %>% dplyr::select(corrected_Brain_PAD,Status) %>% group_by(Status) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 2 × 4
##   Status    mean    sd std.error
##   <chr>    <dbl> <dbl>     <dbl>
## 1 Control -1.04   7.25     0.501
## 2 Patient  0.741  8.44     0.494
dat %>% dplyr::select(corrected_Brain_PAD,status_sch) %>% group_by(status_sch) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_sch      mean    sd std.error
##   <chr>          <dbl> <dbl>     <dbl>
## 1 Control       -1.04   7.25     0.501
## 2 Schizophrenia  0.800  8.54     0.610
## 3 <NA>           0.619  8.27     0.844
dat %>% dplyr::select(corrected_Brain_PAD,status_trs) %>% group_by(status_trs) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_trs   mean    sd std.error
##   <chr>       <dbl> <dbl>     <dbl>
## 1 Control    -1.04   7.25     0.501
## 2 TRS         1.66   8.78     1.29 
## 3 <NA>        0.568  8.38     0.534
dat %>% dplyr::select(corrected_Brain_PAD,status_ssd) %>% group_by(status_ssd) %>% summarise_each(funs(mean,sd,std.error))
## # A tibble: 3 × 4
##   status_ssd   mean    sd std.error
##   <chr>       <dbl> <dbl>     <dbl>
## 1 Control    -1.04   7.25     0.501
## 2 FE-SSD      0.781  7.62     0.899
## 3 <NA>        0.728  8.70     0.587
ggplot(dat,aes(x = corrected_Brain_PAD, fill = Status)) + geom_density(alpha = 0.5) + scale_fill_manual(values=palette) +
  coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_sch),],aes(x = corrected_Brain_PAD, fill = status_sch)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_ssd),],aes(x = corrected_Brain_PAD, fill = status_ssd)) + scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

ggplot(dat[!is.na(dat$status_trs),],aes(x = corrected_Brain_PAD, fill = status_trs)) +scale_fill_manual(values=palette) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

library(ggplot2)
ggplot(dat,aes(x = corrected_Brain_PAD, fill = Status)) + geom_density(alpha = 0.5) + scale_fill_manual(values=palette[c(3,5)]) +
  coord_cartesian(xlim =c(-30,30)) + theme_bw()

A <- ggplot(dat[!is.na(dat$status_sch),],aes(x = corrected_Brain_PAD, fill = status_sch)) + scale_fill_manual(values=palette[c(3,4)]) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

B <- ggplot(dat[!is.na(dat$status_ssd),],aes(x = corrected_Brain_PAD, fill = status_ssd)) + scale_fill_manual(values=palette[c(3,1)]) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

C <- ggplot(dat[!is.na(dat$status_trs),],aes(x = corrected_Brain_PAD, fill = status_trs)) +scale_fill_manual(values=palette[c(3,2)]) + geom_density(alpha = 0.5) + coord_cartesian(xlim =c(-30,30)) + theme_bw() + guides(fill=guide_legend(title=NULL))

library(ggpubr)
png("S6_2(RVR).png", width = 1400, height = 300)
ggarrange(A,B,C,
          labels = c("A","B","C"), 
          ncol = 3, nrow = 1,
          font.label = list(size=20))
dev.off()
## png 
##   2
# ANOVA results (Control vs FE-SSD vs TRS)
dat_for_regression <- dat
replicated <- setdiff(which(!is.na(dat$status_trs) & !is.na(dat$status_ssd)),1:214)
dat[replicated,]
##     SubjID corrected_Brain_PAD  Status status_ssd status_trs    status_sch
## 474    T19            2.726892 Patient     FE-SSD        TRS Schizophrenia
## 478    T24           -4.853495 Patient     FE-SSD        TRS Schizophrenia
dat_for_regression <- rbind(dat_for_regression,dat[replicated,])
dat_for_regression[replicated,]$status_ssd <- NA
dat_for_regression[(nrow(dat_for_regression)-1):nrow(dat_for_regression),]$status_trs <- NA

table(dat_for_regression$status_trs)
## 
## Control     TRS 
##     209      46
table(dat_for_regression$status_ssd)
## 
## Control  FE-SSD 
##     209      72
test_dat_for_regression <- test_dat
test_dat_for_regression <- rbind(test_dat_for_regression,test_dat[replicated,])

dat_for_regression$status_ssd[is.na(dat_for_regression$status_ssd)] = dat_for_regression$status_trs[is.na(dat_for_regression$status_ssd)]

summary(res_aov <- aov(dat_for_regression$corrected_Brain_PAD~
                         dat_for_regression$status_ssd))
##                                Df Sum Sq Mean Sq F value Pr(>F)  
## dat_for_regression$status_ssd   2    374  187.05   3.272 0.0392 *
## Residuals                     324  18523   57.17                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 176개의 관측치가 삭제되었습니다.
pairwise.t.test(dat_for_regression$corrected_Brain_PAD,
               dat_for_regression$status_ssd, p.adj = "fdr")$p.value
##           Control    FE-SSD
## FE-SSD 0.11948300        NA
## TRS    0.08713419 0.5363722
pairwise.t.test(dat_for_regression$corrected_Brain_PAD,
               dat_for_regression$status_ssd, p.adj = "bonferroni")$p.value
##           Control FE-SSD
## FE-SSD 0.23896600     NA
## TRS    0.08713419      1
# t.test results (Control vs Schizo)
t.test(dat$corrected_Brain_PAD~dat$status_sch)
## 
##  Welch Two Sample t-test
## 
## data:  dat$corrected_Brain_PAD by dat$status_sch
## t = -2.3249, df = 383.46, p-value = 0.0206
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -3.3878440 -0.2831972
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                  -1.0351309                   0.8003897
t.test(dat$corrected_Brain_PAD~dat$status_sch, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  dat$corrected_Brain_PAD by dat$status_sch
## t = -2.337, df = 403, p-value = 0.01993
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -3.3795224 -0.2915189
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                  -1.0351309                   0.8003897
summary(res_aov <- aov(dat$corrected_Brain_PAD~dat$status_sch))
##                 Df Sum Sq Mean Sq F value Pr(>F)  
## dat$status_sch   1    341   340.8   5.462 0.0199 *
## Residuals      403  25144    62.4                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 96개의 관측치가 삭제되었습니다.


SFCN result

train_error_ctrl_rid <- TRS$Pred_age - TRS$Real_age
par(mfrow=c(1,1))
plot(TRS$Pred_age,TRS$Real_age,
     xlim=c(10,80),ylim=c(10,80),ylab="Chronological age (Train control data)",xlab="Predicted brain age",pch=16)
abline(0,1)
legend("topleft",
       legend=c(paste("r=",round(cor(TRS$Real_age,TRS$Pred_age),4)),
                paste("R2=",round(cor(TRS$Real_age,TRS$Pred_age)^2,4)),
                paste("MAE=",round(sum(abs(train_error_ctrl_rid)/length(train_error_ctrl_rid)),4)),
                paste("VEcv=",round(1-sum(train_error_ctrl_rid^2/(var(TRS$Real_age)*(length(TRS$Real_age)-1))),4)*100,"%")) ,bty="n")

Patient <- rbind(SCZ,SSD,TRS)
Patient <- Patient[!duplicated(Patient),]

test_dat <- rbind(HC_te,Patient) #209+233=442

test_error_rid <- test_dat$Pred_age-test_dat$Real_age
PAD_rid <- test_error_rid
mean(PAD_rid) ; sd(PAD_rid) ; median(PAD_rid)
## [1] -2.232437
## [1] 7.601841
## [1] -2.096713
table(test_dat$Real_age)
## 
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 
## 10  8 25 13 19 15 18 24 28 15 14 13  7  3  9  9  2  8  8  3  3  5  8  3  7  3 
## 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 69 70 72 
## 10 13  6 13  8  8  6  8  7  8  6 11 17  7  8  3  4  2  3  3  4  1  2  2  1  1
plot(test_dat$Real_age,PAD_rid,xlim=c(15,75),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test)",ylab="Brain PAD uncorrected")
abline(h=0,lty=2,col="red")
abline(lm(PAD_rid~test_dat$Real_age))
legend("topright",legend=paste("r=",round(cor(PAD_rid,test_dat$Real_age),4),"(p<0.05)"))

Model_linear <- lm(PAD_rid ~ test_dat$Sex + test_dat$Real_age)
NullModel <- lm(PAD_rid~1)
anova(NullModel,Model_linear,test="Chisq") # Reject H0 (Full model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ 1
## Model 2: PAD_rid ~ test_dat$Sex + test_dat$Real_age
##   Res.Df   RSS Df Sum of Sq Pr(>Chi)  
## 1    441 25484                        
## 2    439 25208  2    276.72  0.08985 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test_dat$age2 <- test_dat$Real_age^2

Model_quad <- lm(PAD_rid ~ test_dat$Sex + test_dat$Real_age + test_dat$age2)
anova(NullModel,Model_quad,test="Chisq") # Not Reject H0 (reduce model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ 1
## Model 2: PAD_rid ~ test_dat$Sex + test_dat$Real_age + test_dat$age2
##   Res.Df   RSS Df Sum of Sq Pr(>Chi)
## 1    441 25484                      
## 2    438 25132  3    352.75   0.1046
anova(Model_linear,Model_quad,test="Chisq") # Not Reject H0 (reduce model is better)
## Analysis of Variance Table
## 
## Model 1: PAD_rid ~ test_dat$Sex + test_dat$Real_age
## Model 2: PAD_rid ~ test_dat$Sex + test_dat$Real_age + test_dat$age2
##   Res.Df   RSS Df Sum of Sq Pr(>Chi)
## 1    439 25208                      
## 2    438 25132  1    76.032   0.2497
cPAD_rid <- lm(PAD_rid~+test_dat$Sex+test_dat$Real_age)$residuals
mean(cPAD_rid) ; sd(cPAD_rid) ; median(cPAD_rid)
## [1] 3.532704e-16
## [1] 7.560456
## [1] -0.132366
plot(test_dat$Real_age,cPAD_rid,xlim=c(10,75),ylim=c(-40,40),pch=16,
     xlab="Chronological age (Test)",ylab="Brain PAD corrected for age")
abline(lm(cPAD_rid~test_dat$Real_age))

dat <- data.frame(ID=test_dat$Data_ID, Brain_PAD = c(PAD_rid), c_Brain_PAD = c(cPAD_rid), 
                  Status=rep(c("Control","Patient"),c(nrow(HC_te),nrow(Patient))))
dat$status_sch <- dat$status_trs <- dat$status_ssd <- rep(NA,nrow(dat))

dat$status_sch[test_dat$Data_ID %in% HC_te$Data_ID ] <- "Control"   
dat$status_sch[test_dat$Data_ID %in% SCZ$Data_ID ] <- "Schizophrenia"

dat$status_ssd[test_dat$Data_ID %in% HC_te$Data_ID  ] <- "Control"
dat$status_ssd[test_dat$Data_ID %in% SSD$Data_ID ] <- "FE-SSD"

dat$status_trs[test_dat$Data_ID %in% HC_te$Data_ID  ] <- "Control"
dat$status_trs[test_dat$Data_ID %in% TRS$Data_ID ] <- "TRS"



#mean sd
dat %>% dplyr::select(Brain_PAD,c_Brain_PAD,status_sch) %>% group_by(status_sch) %>% summarise_each(funs(mean,sd))
## # A tibble: 3 × 5
##   status_sch    Brain_PAD_mean c_Brain_PAD_mean Brain_PAD_sd c_Brain_PAD_sd
##   <chr>                  <dbl>            <dbl>        <dbl>          <dbl>
## 1 Control               -3.84             -1.62         6.72           6.63
## 2 Schizophrenia         -0.885             1.47         8.57           8.59
## 3 <NA>                  -0.315             1.34         4.51           4.20
dat %>% dplyr::select(Brain_PAD,c_Brain_PAD,status_ssd) %>% group_by(status_ssd) %>% summarise_each(funs(mean,sd))
## # A tibble: 3 × 5
##   status_ssd Brain_PAD_mean c_Brain_PAD_mean Brain_PAD_sd c_Brain_PAD_sd
##   <chr>               <dbl>            <dbl>        <dbl>          <dbl>
## 1 Control            -3.84             -1.62         6.72           6.63
## 2 FE-SSD             -0.378             1.38         7.74           7.51
## 3 <NA>               -0.981             1.49         8.22           8.30
dat %>% dplyr::select(Brain_PAD,c_Brain_PAD,status_trs) %>% group_by(status_trs) %>% summarise_each(funs(mean,sd))
## # A tibble: 3 × 5
##   status_trs Brain_PAD_mean c_Brain_PAD_mean Brain_PAD_sd c_Brain_PAD_sd
##   <chr>               <dbl>            <dbl>        <dbl>          <dbl>
## 1 Control             -3.84           -1.62          6.72           6.63
## 2 TRS                  1.49            4.06          9.05           9.12
## 3 <NA>                -1.36            0.810         7.72           7.65
var.test(dat$c_Brain_PAD~dat$status_sch)
## 
##  F test to compare two variances
## 
## data:  dat$c_Brain_PAD by dat$status_sch
## F = 0.59546, num df = 208, denom df = 195, p-value = 0.0002479
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4510721 0.7850743
## sample estimates:
## ratio of variances 
##          0.5954574
t.test(dat$c_Brain_PAD~dat$status_sch)
## 
##  Welch Two Sample t-test
## 
## data:  dat$c_Brain_PAD by dat$status_sch
## t = -4.0379, df = 366.46, p-value = 6.572e-05
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -4.599511 -1.586765
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                   -1.619196                    1.473943
summary(res_aov <- aov(dat$c_Brain_PAD~dat$status_sch))
##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## dat$status_sch   1    968   967.7   16.57 5.64e-05 ***
## Residuals      403  23532    58.4                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 37개의 관측치가 삭제되었습니다.
dat2 <- dat
replicated <- setdiff(which(!is.na(dat$status_trs) & !is.na(dat$status_ssd)),1:209)
dat[replicated,]
##      ID Brain_PAD c_Brain_PAD  Status status_ssd status_trs    status_sch
## 281 T24 -2.105789  -0.5382714 Patient     FE-SSD        TRS Schizophrenia
## 345 T19  3.355400   4.6341198 Patient     FE-SSD        TRS Schizophrenia
dat2 <- rbind(dat2,dat[replicated,])
dat2[replicated,]$status_ssd <- NA
dat2[(nrow(dat2)-1):nrow(dat2),]$status_trs <- NA

table(dat2$status_trs)
## 
## Control     TRS 
##     209      46
table(dat2$status_ssd)
## 
## Control  FE-SSD 
##     209      72
dat2$status_ssd[is.na(dat2$status_ssd)] = dat2$status_trs[is.na(dat2$status_ssd)]
summary(res_aov <- aov(dat2$Brain_PAD~dat2$status_ssd))
##                  Df Sum Sq Mean Sq F value   Pr(>F)    
## dat2$status_ssd   2   1419   709.5   13.26 2.91e-06 ***
## Residuals       324  17334    53.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 117개의 관측치가 삭제되었습니다.
pairwise.t.test(dat2$Brain_PAD,dat2$status_ssd, p.adj = "fdr")$p.value
##             Control    FE-SSD
## FE-SSD 9.231017e-04        NA
## TRS    3.247208e-05 0.1770119
pairwise.t.test(dat2$Brain_PAD,dat2$status_ssd, p.adj = "bonferroni")$p.value
##             Control    FE-SSD
## FE-SSD 1.846203e-03        NA
## TRS    3.247208e-05 0.5310358
# test
var.test(dat$Brain_PAD~dat$status_sch)
## 
##  F test to compare two variances
## 
## data:  dat$Brain_PAD by dat$status_sch
## F = 0.61388, num df = 208, denom df = 195, p-value = 0.000559
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4650246 0.8093580
## sample estimates:
## ratio of variances 
##           0.613876
t.test(dat$Brain_PAD~dat$status_sch)
## 
##  Welch Two Sample t-test
## 
## data:  dat$Brain_PAD by dat$status_sch
## t = -3.8379, df = 369.38, p-value = 0.000146
## alternative hypothesis: true difference in means between group Control and group Schizophrenia is not equal to 0
## 95 percent confidence interval:
##  -4.462363 -1.438791
## sample estimates:
##       mean in group Control mean in group Schizophrenia 
##                  -3.8355271                  -0.8849502
summary(res_aov <- aov(dat$Brain_PAD~dat$status_sch))
##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## dat$status_sch   1    881   880.6   14.96 0.000128 ***
## Residuals      403  23725    58.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 37개의 관측치가 삭제되었습니다.
dat2 <- dat
replicated <- setdiff(which(!is.na(dat$status_trs) & !is.na(dat$status_ssd)),1:209)
dat[replicated,]
##      ID Brain_PAD c_Brain_PAD  Status status_ssd status_trs    status_sch
## 281 T24 -2.105789  -0.5382714 Patient     FE-SSD        TRS Schizophrenia
## 345 T19  3.355400   4.6341198 Patient     FE-SSD        TRS Schizophrenia
dat2 <- rbind(dat2,dat[replicated,])
dat2[replicated,]$status_ssd <- NA
dat2[(nrow(dat2)-1):nrow(dat2),]$status_trs <- NA

table(dat2$status_trs)
## 
## Control     TRS 
##     209      46
table(dat2$status_ssd)
## 
## Control  FE-SSD 
##     209      72
test_dat2 <- test_dat
test_dat2 <- rbind(test_dat2,test_dat[replicated,])
dat2$status_ssd[is.na(dat2$status_ssd)] = dat2$status_trs[is.na(dat2$status_ssd)]

summary(res_aov <- aov(dat2$Brain_PAD~dat2$status_ssd))
##                  Df Sum Sq Mean Sq F value   Pr(>F)    
## dat2$status_ssd   2   1419   709.5   13.26 2.91e-06 ***
## Residuals       324  17334    53.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 결측으로 인하여 117개의 관측치가 삭제되었습니다.
pairwise.t.test(dat2$Brain_PAD,dat2$status_ssd, p.adj = "fdr")$p.value
##             Control    FE-SSD
## FE-SSD 9.231017e-04        NA
## TRS    3.247208e-05 0.1770119
pairwise.t.test(dat2$Brain_PAD,dat2$status_ssd, p.adj = "bonferroni")$p.value
##             Control    FE-SSD
## FE-SSD 1.846203e-03        NA
## TRS    3.247208e-05 0.5310358
# estimation
summary(lm(Brain_PAD~status_sch+test_dat$age+test_dat$Sex, data=dat))
## 
## Call:
## lm(formula = Brain_PAD ~ status_sch + test_dat$age + test_dat$Sex, 
##     data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.978  -3.564   0.068   4.121  51.626 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -3.3571776  0.8482260  -3.958 8.94e-05 ***
## status_schSchizophrenia  3.1237698  0.7774593   4.018 7.01e-05 ***
## test_dat$age            -0.0005630  0.0003416  -1.648    0.100    
## test_dat$Sex2            0.6974082  0.7980182   0.874    0.383    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.663 on 401 degrees of freedom
##   (결측으로 인하여 37개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.04291,    Adjusted R-squared:  0.03575 
## F-statistic: 5.993 on 3 and 401 DF,  p-value: 0.0005297
summary(lm(Brain_PAD~dat2$status_ssd+test_dat2$age+test_dat2$Sex, data=dat2))
## 
## Call:
## lm(formula = Brain_PAD ~ dat2$status_ssd + test_dat2$age + test_dat2$Sex, 
##     data = dat2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.122  -3.366  -0.122   3.276  51.126 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -2.3532822  0.8752706  -2.689  0.00755 ** 
## dat2$status_ssdFE-SSD  2.8081528  1.0134099   2.771  0.00591 ** 
## dat2$status_ssdTRS     5.7574097  1.2117429   4.751 3.05e-06 ***
## test_dat2$age         -0.0010847  0.0003669  -2.956  0.00334 ** 
## test_dat2$Sex2         0.4558583  0.8468851   0.538  0.59076    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.239 on 322 degrees of freedom
##   (결측으로 인하여 117개의 관측치가 삭제되었습니다.)
## Multiple R-squared:  0.1001, Adjusted R-squared:  0.08892 
## F-statistic: 8.955 on 4 and 322 DF,  p-value: 7.22e-07